LLVM 15.0.7
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
85#include "llvm/Support/Debug.h"
87#include "llvm/Support/Format.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE "ppc-lowering"
105
106static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108
109static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisableSCO("disable-ppc-sco",
116cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117
118static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120
121static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123
125 "ppc-quadword-atomics",
126 cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
127 cl::Hidden);
128
129static cl::opt<bool>
130 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
131 cl::desc("disable vector permute decomposition"),
132 cl::init(true), cl::Hidden);
133
135 "disable-auto-paired-vec-st",
136 cl::desc("disable automatically generated 32byte paired vector stores"),
137 cl::init(true), cl::Hidden);
138
139STATISTIC(NumTailCalls, "Number of tail calls");
140STATISTIC(NumSiblingCalls, "Number of sibling calls");
141STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
142STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
143
144static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
145
146static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
147
148static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
149
150// FIXME: Remove this once the bug has been fixed!
152
154 const PPCSubtarget &STI)
155 : TargetLowering(TM), Subtarget(STI) {
156 // Initialize map that relates the PPC addressing modes to the computed flags
157 // of a load/store instruction. The map is used to determine the optimal
158 // addressing mode when selecting load and stores.
159 initializeAddrModeMap();
160 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
161 // arguments are at least 4/8 bytes aligned.
162 bool isPPC64 = Subtarget.isPPC64();
163 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
164
165 // Set up the register classes.
166 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
167 if (!useSoftFloat()) {
168 if (hasSPE()) {
169 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
170 // EFPU2 APU only supports f32
171 if (!Subtarget.hasEFPU2())
172 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
173 } else {
174 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
175 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
176 }
177 }
178
179 // Match BITREVERSE to customized fast code sequence in the td file.
182
183 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
184 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
185
186 // Custom lower inline assembly to check for special registers.
187 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
188 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
189
190 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
191 for (MVT VT : MVT::integer_valuetypes()) {
194 }
195
196 if (Subtarget.isISA3_0()) {
201 } else {
202 // No extending loads from f16 or HW conversions back and forth.
204 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
205 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
207 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
208 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
211 }
212
214
215 // PowerPC has pre-inc load and store's.
226 if (!Subtarget.hasSPE()) {
231 }
232
233 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
234 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
235 for (MVT VT : ScalarIntVTs) {
240 }
241
242 if (Subtarget.useCRBits()) {
244
245 if (isPPC64 || Subtarget.hasFPCVT()) {
248 isPPC64 ? MVT::i64 : MVT::i32);
251 isPPC64 ? MVT::i64 : MVT::i32);
252
255 isPPC64 ? MVT::i64 : MVT::i32);
258 isPPC64 ? MVT::i64 : MVT::i32);
259
262 isPPC64 ? MVT::i64 : MVT::i32);
265 isPPC64 ? MVT::i64 : MVT::i32);
266
269 isPPC64 ? MVT::i64 : MVT::i32);
272 isPPC64 ? MVT::i64 : MVT::i32);
273 } else {
278 }
279
280 // PowerPC does not support direct load/store of condition registers.
281 setOperationAction(ISD::LOAD, MVT::i1, Custom);
282 setOperationAction(ISD::STORE, MVT::i1, Custom);
283
284 // FIXME: Remove this once the ANDI glue bug is fixed:
285 if (ANDIGlueBug)
287
288 for (MVT VT : MVT::integer_valuetypes()) {
292 }
293
294 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
295 }
296
297 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
298 // PPC (the libcall is not available).
303
304 // We do not currently implement these libm ops for PowerPC.
309 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
311
312 // PowerPC has no SREM/UREM instructions unless we are on P9
313 // On P9 we may use a hardware instruction to compute the remainder.
314 // When the result of both the remainder and the division is required it is
315 // more efficient to compute the remainder from the result of the division
316 // rather than use the remainder instruction. The instructions are legalized
317 // directly because the DivRemPairsPass performs the transformation at the IR
318 // level.
319 if (Subtarget.isISA3_0()) {
324 } else {
329 }
330
331 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
340
341 // Handle constrained floating-point operations of scalar.
342 // TODO: Handle SPE specific operation.
348
353
354 if (!Subtarget.hasSPE()) {
357 }
358
359 if (Subtarget.hasVSX()) {
362 }
363
364 if (Subtarget.hasFSQRT()) {
367 }
368
369 if (Subtarget.hasFPRND()) {
374
379 }
380
381 // We don't support sin/cos/sqrt/fmod/pow
382 setOperationAction(ISD::FSIN , MVT::f64, Expand);
383 setOperationAction(ISD::FCOS , MVT::f64, Expand);
384 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
386 setOperationAction(ISD::FPOW , MVT::f64, Expand);
387 setOperationAction(ISD::FSIN , MVT::f32, Expand);
388 setOperationAction(ISD::FCOS , MVT::f32, Expand);
389 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
391 setOperationAction(ISD::FPOW , MVT::f32, Expand);
392
393 // MASS transformation for LLVM intrinsics with replicating fast-math flag
394 // to be consistent to PPCGenScalarMASSEntries pass
395 if (TM.getOptLevel() == CodeGenOpt::Aggressive) {
396 setOperationAction(ISD::FSIN , MVT::f64, Custom);
397 setOperationAction(ISD::FCOS , MVT::f64, Custom);
398 setOperationAction(ISD::FPOW , MVT::f64, Custom);
400 setOperationAction(ISD::FLOG10, MVT::f64, Custom);
402 setOperationAction(ISD::FSIN , MVT::f32, Custom);
403 setOperationAction(ISD::FCOS , MVT::f32, Custom);
404 setOperationAction(ISD::FPOW , MVT::f32, Custom);
406 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
408 }
409
410 if (Subtarget.hasSPE()) {
413 } else {
416 }
417
418 if (Subtarget.hasSPE())
420
422
423 // If we're enabling GP optimizations, use hardware square root
424 if (!Subtarget.hasFSQRT() &&
425 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
426 Subtarget.hasFRE()))
427 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
428
429 if (!Subtarget.hasFSQRT() &&
430 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
431 Subtarget.hasFRES()))
432 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
433
434 if (Subtarget.hasFCPSGN()) {
437 } else {
440 }
441
442 if (Subtarget.hasFPRND()) {
443 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
444 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
445 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
446 setOperationAction(ISD::FROUND, MVT::f64, Legal);
447
448 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
449 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
450 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
451 setOperationAction(ISD::FROUND, MVT::f32, Legal);
452 }
453
454 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
455 // to speed up scalar BSWAP64.
456 // CTPOP or CTTZ were introduced in P8/P9 respectively
458 if (Subtarget.hasP9Vector() && Subtarget.isPPC64())
460 else
462 if (Subtarget.isISA3_0()) {
465 } else {
468 }
469
470 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
473 } else {
476 }
477
478 // PowerPC does not have ROTR
481
482 if (!Subtarget.useCRBits()) {
483 // PowerPC does not have Select
488 }
489
490 // PowerPC wants to turn select_cc of FP into fsel when possible.
493
494 // PowerPC wants to optimize integer setcc a bit
495 if (!Subtarget.useCRBits())
497
498 if (Subtarget.hasFPU()) {
502
506 }
507
508 // PowerPC does not have BRCOND which requires SetCC
509 if (!Subtarget.useCRBits())
510 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
511
513
514 if (Subtarget.hasSPE()) {
515 // SPE has built-in conversions
522
523 // SPE supports signaling compare of f32/f64.
526 } else {
527 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
530
531 // PowerPC does not have [U|S]INT_TO_FP
536 }
537
538 if (Subtarget.hasDirectMove() && isPPC64) {
539 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
540 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
541 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
542 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
543 if (TM.Options.UnsafeFPMath) {
544 setOperationAction(ISD::LRINT, MVT::f64, Legal);
545 setOperationAction(ISD::LRINT, MVT::f32, Legal);
546 setOperationAction(ISD::LLRINT, MVT::f64, Legal);
547 setOperationAction(ISD::LLRINT, MVT::f32, Legal);
548 setOperationAction(ISD::LROUND, MVT::f64, Legal);
549 setOperationAction(ISD::LROUND, MVT::f32, Legal);
550 setOperationAction(ISD::LLROUND, MVT::f64, Legal);
551 setOperationAction(ISD::LLROUND, MVT::f32, Legal);
552 }
553 } else {
554 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
555 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
556 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
557 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
558 }
559
560 // We cannot sextinreg(i1). Expand to shifts.
562
563 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
564 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
565 // support continuation, user-level threading, and etc.. As a result, no
566 // other SjLj exception interfaces are implemented and please don't build
567 // your own exception handling based on them.
568 // LLVM/Clang supports zero-cost DWARF exception handling.
571
572 // We want to legalize GlobalAddress and ConstantPool nodes into the
573 // appropriate instructions to materialize the address.
584
585 // TRAP is legal.
587
588 // TRAMPOLINE is custom lowered.
589 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
590 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
591
592 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
593 setOperationAction(ISD::VASTART , MVT::Other, Custom);
594
595 if (Subtarget.is64BitELFABI()) {
596 // VAARG always uses double-word chunks, so promote anything smaller.
597 setOperationAction(ISD::VAARG, MVT::i1, Promote);
598 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
599 setOperationAction(ISD::VAARG, MVT::i8, Promote);
600 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
601 setOperationAction(ISD::VAARG, MVT::i16, Promote);
602 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
603 setOperationAction(ISD::VAARG, MVT::i32, Promote);
604 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
606 } else if (Subtarget.is32BitELFABI()) {
607 // VAARG is custom lowered with the 32-bit SVR4 ABI.
609 setOperationAction(ISD::VAARG, MVT::i64, Custom);
610 } else
612
613 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
614 if (Subtarget.is32BitELFABI())
615 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
616 else
617 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
618
619 // Use the default implementation.
620 setOperationAction(ISD::VAEND , MVT::Other, Expand);
621 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
622 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
623 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
624 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
625 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
626 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
629
630 // We want to custom lower some of our intrinsics.
636
637 // To handle counter-based loop conditions.
639
644
645 // Comparisons that require checking two conditions.
646 if (Subtarget.hasSPE()) {
651 }
664
667
668 if (Subtarget.has64BitSupport()) {
669 // They also have instructions for converting between i64 and fp.
678 // This is just the low 32 bits of a (signed) fp->i64 conversion.
679 // We cannot do this with Promote because i64 is not a legal type.
682
683 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
686 }
687 } else {
688 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
689 if (Subtarget.hasSPE()) {
692 } else {
695 }
696 }
697
698 // With the instructions enabled under FPCVT, we can do everything.
699 if (Subtarget.hasFPCVT()) {
700 if (Subtarget.has64BitSupport()) {
709 }
710
719 }
720
721 if (Subtarget.use64BitRegs()) {
722 // 64-bit PowerPC implementations can support i64 types directly
723 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
724 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
726 // 64-bit PowerPC wants to expand i128 shifts itself.
730 } else {
731 // 32-bit PowerPC wants to expand i64 shifts itself.
735 }
736
737 // PowerPC has better expansions for funnel shifts than the generic
738 // TargetLowering::expandFunnelShift.
739 if (Subtarget.has64BitSupport()) {
742 }
745
746 if (Subtarget.hasVSX()) {
747 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
748 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
749 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
750 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
751 }
752
753 if (Subtarget.hasAltivec()) {
754 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
759 }
760 // First set operation action for all vector types to expand. Then we
761 // will selectively turn on ones that can be effectively codegen'd.
763 // add/sub are legal for all supported vector VT's.
766
767 // For v2i64, these are only valid with P8Vector. This is corrected after
768 // the loop.
769 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
774 }
775 else {
780 }
781
782 if (Subtarget.hasVSX()) {
783 setOperationAction(ISD::FMAXNUM, VT, Legal);
784 setOperationAction(ISD::FMINNUM, VT, Legal);
785 }
786
787 // Vector instructions introduced in P8
788 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
791 }
792 else {
795 }
796
797 // Vector instructions introduced in P9
798 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
800 else
802
803 // We promote all shuffles to v16i8.
806
807 // We promote all non-typed operations to v4i32.
814 setOperationAction(ISD::LOAD , VT, Promote);
815 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
821 setOperationAction(ISD::STORE, VT, Promote);
822 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
823
824 // No other operations are legal.
832 setOperationAction(ISD::FNEG, VT, Expand);
833 setOperationAction(ISD::FSQRT, VT, Expand);
834 setOperationAction(ISD::FLOG, VT, Expand);
835 setOperationAction(ISD::FLOG10, VT, Expand);
836 setOperationAction(ISD::FLOG2, VT, Expand);
837 setOperationAction(ISD::FEXP, VT, Expand);
838 setOperationAction(ISD::FEXP2, VT, Expand);
839 setOperationAction(ISD::FSIN, VT, Expand);
840 setOperationAction(ISD::FCOS, VT, Expand);
841 setOperationAction(ISD::FABS, VT, Expand);
842 setOperationAction(ISD::FFLOOR, VT, Expand);
843 setOperationAction(ISD::FCEIL, VT, Expand);
844 setOperationAction(ISD::FTRUNC, VT, Expand);
845 setOperationAction(ISD::FRINT, VT, Expand);
846 setOperationAction(ISD::FNEARBYINT, VT, Expand);
857 setOperationAction(ISD::FPOW, VT, Expand);
862
863 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
864 setTruncStoreAction(VT, InnerVT, Expand);
867 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
868 }
869 }
871 if (!Subtarget.hasP8Vector()) {
876 }
877
878 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
879 // with merges, splats, etc.
881
882 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
883 // are cheap, so handle them before they get expanded to scalar.
889
893 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
895 Subtarget.useCRBits() ? Legal : Expand);
896 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
905 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
906 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
907 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
908 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
909
910 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
912 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
913 if (Subtarget.hasAltivec())
914 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
916 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
917 if (Subtarget.hasP8Altivec())
919
920 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
921 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
922 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
923 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
924
927
928 if (Subtarget.hasVSX()) {
930 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
932 }
933
934 if (Subtarget.hasP8Altivec())
936 else
938
939 if (Subtarget.isISA3_1()) {
958 }
959
962
965
970
971 // Altivec does not contain unordered floating-point compare instructions
976
977 if (Subtarget.hasVSX()) {
980 if (Subtarget.hasP8Vector()) {
983 }
984 if (Subtarget.hasDirectMove() && isPPC64) {
993 }
995
996 // The nearbyint variants are not allowed to raise the inexact exception
997 // so we can only code-gen them with unsafe math.
998 if (TM.Options.UnsafeFPMath) {
999 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1000 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1001 }
1002
1003 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1004 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1005 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1006 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1007 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1008 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1009 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1010 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1011
1012 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1013 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1014 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1015 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1016 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1017
1020
1022 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1023
1024 // Share the Altivec comparison restrictions.
1029
1030 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1031 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1032
1034
1035 if (Subtarget.hasP8Vector())
1036 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1037
1038 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1039
1040 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1041 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1042 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1043
1044 if (Subtarget.hasP8Altivec()) {
1048
1049 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1050 // SRL, but not for SRA because of the instructions available:
1051 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1052 // doing
1056
1058 }
1059 else {
1063
1065
1066 // VSX v2i64 only supports non-arithmetic operations.
1069 }
1070
1071 if (Subtarget.isISA3_1())
1073 else
1075
1080
1082
1091
1092 // Custom handling for partial vectors of integers converted to
1093 // floating point. We already have optimal handling for v2i32 through
1094 // the DAG combine, so those aren't necessary.
1111
1112 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1113 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1114 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1115 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1118
1121
1122 // Handle constrained floating-point operations of vector.
1123 // The predictor is `hasVSX` because altivec instruction has
1124 // no exception but VSX vector instruction has.
1138
1152
1153 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1154 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1155
1156 for (MVT FPT : MVT::fp_valuetypes())
1158
1159 // Expand the SELECT to SELECT_CC
1161
1164
1165 // No implementation for these ops for PowerPC.
1166 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1167 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1168 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1169 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1171 }
1172
1173 if (Subtarget.hasP8Altivec()) {
1174 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1175 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1176 }
1177
1178 if (Subtarget.hasP9Vector()) {
1181
1182 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1183 // SRL, but not for SRA because of the instructions available:
1184 // VS{RL} and VS{RL}O.
1188
1193 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1194
1202
1203 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1204 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1205 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1206 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1207 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1208 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1209
1212 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1213
1214 // Handle constrained floating-point operations of fp128
1230 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1235 } else if (Subtarget.hasVSX()) {
1237 setOperationAction(ISD::STORE, MVT::f128, Promote);
1238
1241
1242 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1243 // fp_to_uint and int_to_fp.
1246
1249 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1250 setOperationAction(ISD::FABS, MVT::f128, Expand);
1251 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1254
1255 // Expand the fp_extend if the target type is fp128.
1256 setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1258
1259 // Expand the fp_round if the source type is fp128.
1260 for (MVT VT : {MVT::f32, MVT::f64}) {
1263 }
1264
1268 setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1269
1270 // Lower following f128 select_cc pattern:
1271 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1273
1274 // We need to handle f128 SELECT_CC with integer result type.
1277 }
1278
1279 if (Subtarget.hasP9Altivec()) {
1280 if (Subtarget.isISA3_1()) {
1285 } else {
1288 }
1296 }
1297
1298 if (Subtarget.hasP10Vector()) {
1300 }
1301 }
1302
1303 if (Subtarget.pairedVectorMemops()) {
1304 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1307 }
1308 if (Subtarget.hasMMA()) {
1309 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1313 }
1314
1315 if (Subtarget.has64BitSupport())
1316 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1317
1318 if (Subtarget.isISA3_1())
1320
1321 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1322
1323 if (!isPPC64) {
1324 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1325 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1326 }
1327
1329 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1330 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1332 }
1333
1335
1336 if (Subtarget.hasAltivec()) {
1337 // Altivec instructions set fields to all zeros or all ones.
1339 }
1340
1341 setLibcallName(RTLIB::MULO_I128, nullptr);
1342 if (!isPPC64) {
1343 // These libcalls are not available in 32-bit.
1344 setLibcallName(RTLIB::SHL_I128, nullptr);
1345 setLibcallName(RTLIB::SRL_I128, nullptr);
1346 setLibcallName(RTLIB::SRA_I128, nullptr);
1347 setLibcallName(RTLIB::MUL_I128, nullptr);
1348 setLibcallName(RTLIB::MULO_I64, nullptr);
1349 }
1350
1351 if (!isPPC64)
1353 else if (shouldInlineQuadwordAtomics())
1355 else
1357
1358 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1359
1360 // We have target-specific dag combine patterns for the following nodes:
1363 if (Subtarget.hasFPCVT())
1365 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1366 if (Subtarget.useCRBits())
1367 setTargetDAGCombine(ISD::BRCOND);
1370
1372
1374
1375 if (Subtarget.useCRBits()) {
1377 }
1378
1379 if (Subtarget.hasP9Altivec()) {
1381 }
1382
1383 setLibcallName(RTLIB::LOG_F128, "logf128");
1384 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1385 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1386 setLibcallName(RTLIB::EXP_F128, "expf128");
1387 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1388 setLibcallName(RTLIB::SIN_F128, "sinf128");
1389 setLibcallName(RTLIB::COS_F128, "cosf128");
1390 setLibcallName(RTLIB::POW_F128, "powf128");
1391 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1392 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1393 setLibcallName(RTLIB::REM_F128, "fmodf128");
1394 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1395 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1396 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1397 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1398 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1399 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1400 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1401 setLibcallName(RTLIB::RINT_F128, "rintf128");
1402 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1403 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1404 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1405 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1406
1407 // With 32 condition bits, we don't need to sink (and duplicate) compares
1408 // aggressively in CodeGenPrep.
1409 if (Subtarget.useCRBits()) {
1412 }
1413
1415
1416 switch (Subtarget.getCPUDirective()) {
1417 default: break;
1418 case PPC::DIR_970:
1419 case PPC::DIR_A2:
1420 case PPC::DIR_E500:
1421 case PPC::DIR_E500mc:
1422 case PPC::DIR_E5500:
1423 case PPC::DIR_PWR4:
1424 case PPC::DIR_PWR5:
1425 case PPC::DIR_PWR5X:
1426 case PPC::DIR_PWR6:
1427 case PPC::DIR_PWR6X:
1428 case PPC::DIR_PWR7:
1429 case PPC::DIR_PWR8:
1430 case PPC::DIR_PWR9:
1431 case PPC::DIR_PWR10:
1435 break;
1436 }
1437
1438 if (Subtarget.enableMachineScheduler())
1440 else
1442
1444
1445 // The Freescale cores do better with aggressive inlining of memcpy and
1446 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1447 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1448 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1449 MaxStoresPerMemset = 32;
1451 MaxStoresPerMemcpy = 32;
1455 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1456 // The A2 also benefits from (very) aggressive inlining of memcpy and
1457 // friends. The overhead of a the function call, even when warm, can be
1458 // over one hundred cycles.
1459 MaxStoresPerMemset = 128;
1460 MaxStoresPerMemcpy = 128;
1461 MaxStoresPerMemmove = 128;
1462 MaxLoadsPerMemcmp = 128;
1463 } else {
1466 }
1467
1468 IsStrictFPEnabled = true;
1469
1470 // Let the subtarget (CPU) decide if a predictable select is more expensive
1471 // than the corresponding branch. This information is used in CGP to decide
1472 // when to convert selects into branches.
1474}
1475
1476// *********************************** NOTE ************************************
1477// For selecting load and store instructions, the addressing modes are defined
1478// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1479// patterns to match the load the store instructions.
1480//
1481// The TD definitions for the addressing modes correspond to their respective
1482// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1483// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1484// address mode flags of a particular node. Afterwards, the computed address
1485// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1486// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1487// accordingly, based on the preferred addressing mode.
1488//
1489// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1490// MemOpFlags contains all the possible flags that can be used to compute the
1491// optimal addressing mode for load and store instructions.
1492// AddrMode contains all the possible load and store addressing modes available
1493// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1494//
1495// When adding new load and store instructions, it is possible that new address
1496// flags may need to be added into MemOpFlags, and a new addressing mode will
1497// need to be added to AddrMode. An entry of the new addressing mode (consisting
1498// of the minimal and main distinguishing address flags for the new load/store
1499// instructions) will need to be added into initializeAddrModeMap() below.
1500// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1501// need to be updated to account for selecting the optimal addressing mode.
1502// *****************************************************************************
1503/// Initialize the map that relates the different addressing modes of the load
1504/// and store instructions to a set of flags. This ensures the load/store
1505/// instruction is correctly matched during instruction selection.
1506void PPCTargetLowering::initializeAddrModeMap() {
1507 AddrModesMap[PPC::AM_DForm] = {
1508 // LWZ, STW
1513 // LBZ, LHZ, STB, STH
1518 // LHA
1523 // LFS, LFD, STFS, STFD
1528 };
1529 AddrModesMap[PPC::AM_DSForm] = {
1530 // LWA
1534 // LD, STD
1538 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1542 };
1543 AddrModesMap[PPC::AM_DQForm] = {
1544 // LXV, STXV
1548 };
1549 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1551 // TODO: Add mapping for quadword load/store.
1552}
1553
1554/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1555/// the desired ByVal argument alignment.
1556static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1557 if (MaxAlign == MaxMaxAlign)
1558 return;
1559 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1560 if (MaxMaxAlign >= 32 &&
1561 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1562 MaxAlign = Align(32);
1563 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1564 MaxAlign < 16)
1565 MaxAlign = Align(16);
1566 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1567 Align EltAlign;
1568 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1569 if (EltAlign > MaxAlign)
1570 MaxAlign = EltAlign;
1571 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1572 for (auto *EltTy : STy->elements()) {
1573 Align EltAlign;
1574 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1575 if (EltAlign > MaxAlign)
1576 MaxAlign = EltAlign;
1577 if (MaxAlign == MaxMaxAlign)
1578 break;
1579 }
1580 }
1581}
1582
1583/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1584/// function arguments in the caller parameter area.
1586 const DataLayout &DL) const {
1587 // 16byte and wider vectors are passed on 16byte boundary.
1588 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1589 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1590 if (Subtarget.hasAltivec())
1591 getMaxByValAlign(Ty, Alignment, Align(16));
1592 return Alignment.value();
1593}
1594
1596 return Subtarget.useSoftFloat();
1597}
1598
1600 return Subtarget.hasSPE();
1601}
1602
1604 return VT.isScalarInteger();
1605}
1606
1607const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1608 switch ((PPCISD::NodeType)Opcode) {
1609 case PPCISD::FIRST_NUMBER: break;
1610 case PPCISD::FSEL: return "PPCISD::FSEL";
1611 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1612 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1613 case PPCISD::FCFID: return "PPCISD::FCFID";
1614 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1615 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1616 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1617 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1618 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1619 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1620 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1622 return "PPCISD::FP_TO_UINT_IN_VSR,";
1624 return "PPCISD::FP_TO_SINT_IN_VSR";
1625 case PPCISD::FRE: return "PPCISD::FRE";
1626 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1627 case PPCISD::FTSQRT:
1628 return "PPCISD::FTSQRT";
1629 case PPCISD::FSQRT:
1630 return "PPCISD::FSQRT";
1631 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1632 case PPCISD::VPERM: return "PPCISD::VPERM";
1633 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1635 return "PPCISD::XXSPLTI_SP_TO_DP";
1637 return "PPCISD::XXSPLTI32DX";
1638 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1639 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1640 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1641 case PPCISD::CMPB: return "PPCISD::CMPB";
1642 case PPCISD::Hi: return "PPCISD::Hi";
1643 case PPCISD::Lo: return "PPCISD::Lo";
1644 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1645 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1646 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1647 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1648 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1649 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1650 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1651 case PPCISD::SRL: return "PPCISD::SRL";
1652 case PPCISD::SRA: return "PPCISD::SRA";
1653 case PPCISD::SHL: return "PPCISD::SHL";
1654 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1655 case PPCISD::CALL: return "PPCISD::CALL";
1656 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1657 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1658 case PPCISD::CALL_RM:
1659 return "PPCISD::CALL_RM";
1661 return "PPCISD::CALL_NOP_RM";
1663 return "PPCISD::CALL_NOTOC_RM";
1664 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1665 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1666 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1667 case PPCISD::BCTRL_RM:
1668 return "PPCISD::BCTRL_RM";
1670 return "PPCISD::BCTRL_LOAD_TOC_RM";
1671 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1672 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1673 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1674 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1675 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1676 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1677 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1678 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1679 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1680 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1682 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1684 return "PPCISD::ANDI_rec_1_EQ_BIT";
1686 return "PPCISD::ANDI_rec_1_GT_BIT";
1687 case PPCISD::VCMP: return "PPCISD::VCMP";
1688 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1689 case PPCISD::LBRX: return "PPCISD::LBRX";
1690 case PPCISD::STBRX: return "PPCISD::STBRX";
1691 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1692 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1693 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1694 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1695 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1696 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1697 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1698 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1699 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1701 return "PPCISD::ST_VSR_SCAL_INT";
1702 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1703 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1704 case PPCISD::BDZ: return "PPCISD::BDZ";
1705 case PPCISD::MFFS: return "PPCISD::MFFS";
1706 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1707 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1708 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1709 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1710 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1711 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1712 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1713 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1714 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1715 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1716 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1717 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1718 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1719 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1720 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1721 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1722 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1723 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1724 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1725 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1727 return "PPCISD::PADDI_DTPREL";
1728 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1729 case PPCISD::SC: return "PPCISD::SC";
1730 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1731 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1732 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1733 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1734 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1735 case PPCISD::VABSD: return "PPCISD::VABSD";
1736 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1737 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1738 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1739 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1740 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1741 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1742 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1744 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1746 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1747 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1748 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1749 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1750 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1751 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1752 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1753 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1754 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1756 return "PPCISD::STRICT_FADDRTZ";
1758 return "PPCISD::STRICT_FCTIDZ";
1760 return "PPCISD::STRICT_FCTIWZ";
1762 return "PPCISD::STRICT_FCTIDUZ";
1764 return "PPCISD::STRICT_FCTIWUZ";
1766 return "PPCISD::STRICT_FCFID";
1768 return "PPCISD::STRICT_FCFIDU";
1770 return "PPCISD::STRICT_FCFIDS";
1772 return "PPCISD::STRICT_FCFIDUS";
1773 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1774 }
1775 return nullptr;
1776}
1777
1779 EVT VT) const {
1780 if (!VT.isVector())
1781 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1782
1784}
1785
1787 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1788 return true;
1789}
1790
1791//===----------------------------------------------------------------------===//
1792// Node matching predicates, for use by the tblgen matching code.
1793//===----------------------------------------------------------------------===//
1794
1795/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1798 return CFP->getValueAPF().isZero();
1799 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1800 // Maybe this has already been legalized into the constant pool?
1801 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1802 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1803 return CFP->getValueAPF().isZero();
1804 }
1805 return false;
1806}
1807
1808/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1809/// true if Op is undef or if it matches the specified value.
1810static bool isConstantOrUndef(int Op, int Val) {
1811 return Op < 0 || Op == Val;
1812}
1813
1814/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1815/// VPKUHUM instruction.
1816/// The ShuffleKind distinguishes between big-endian operations with
1817/// two different inputs (0), either-endian operations with two identical
1818/// inputs (1), and little-endian operations with two different inputs (2).
1819/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1821 SelectionDAG &DAG) {
1822 bool IsLE = DAG.getDataLayout().isLittleEndian();
1823 if (ShuffleKind == 0) {
1824 if (IsLE)
1825 return false;
1826 for (unsigned i = 0; i != 16; ++i)
1827 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1828 return false;
1829 } else if (ShuffleKind == 2) {
1830 if (!IsLE)
1831 return false;
1832 for (unsigned i = 0; i != 16; ++i)
1833 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1834 return false;
1835 } else if (ShuffleKind == 1) {
1836 unsigned j = IsLE ? 0 : 1;
1837 for (unsigned i = 0; i != 8; ++i)
1838 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1839 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1840 return false;
1841 }
1842 return true;
1843}
1844
1845/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1846/// VPKUWUM instruction.
1847/// The ShuffleKind distinguishes between big-endian operations with
1848/// two different inputs (0), either-endian operations with two identical
1849/// inputs (1), and little-endian operations with two different inputs (2).
1850/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1852 SelectionDAG &DAG) {
1853 bool IsLE = DAG.getDataLayout().isLittleEndian();
1854 if (ShuffleKind == 0) {
1855 if (IsLE)
1856 return false;
1857 for (unsigned i = 0; i != 16; i += 2)
1858 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1859 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1860 return false;
1861 } else if (ShuffleKind == 2) {
1862 if (!IsLE)
1863 return false;
1864 for (unsigned i = 0; i != 16; i += 2)
1865 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1866 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1867 return false;
1868 } else if (ShuffleKind == 1) {
1869 unsigned j = IsLE ? 0 : 2;
1870 for (unsigned i = 0; i != 8; i += 2)
1871 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1872 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1873 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1874 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1875 return false;
1876 }
1877 return true;
1878}
1879
1880/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1881/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1882/// current subtarget.
1883///
1884/// The ShuffleKind distinguishes between big-endian operations with
1885/// two different inputs (0), either-endian operations with two identical
1886/// inputs (1), and little-endian operations with two different inputs (2).
1887/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1889 SelectionDAG &DAG) {
1890 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1891 if (!Subtarget.hasP8Vector())
1892 return false;
1893
1894 bool IsLE = DAG.getDataLayout().isLittleEndian();
1895 if (ShuffleKind == 0) {
1896 if (IsLE)
1897 return false;
1898 for (unsigned i = 0; i != 16; i += 4)
1899 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1900 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1901 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1902 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1903 return false;
1904 } else if (ShuffleKind == 2) {
1905 if (!IsLE)
1906 return false;
1907 for (unsigned i = 0; i != 16; i += 4)
1908 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1909 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1910 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1911 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1912 return false;
1913 } else if (ShuffleKind == 1) {
1914 unsigned j = IsLE ? 0 : 4;
1915 for (unsigned i = 0; i != 8; i += 4)
1916 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1917 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1918 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1919 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1920 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1921 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1922 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1923 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1924 return false;
1925 }
1926 return true;
1927}
1928
1929/// isVMerge - Common function, used to match vmrg* shuffles.
1930///
1931static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1932 unsigned LHSStart, unsigned RHSStart) {
1933 if (N->getValueType(0) != MVT::v16i8)
1934 return false;
1935 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1936 "Unsupported merge size!");
1937
1938 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1939 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1940 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1941 LHSStart+j+i*UnitSize) ||
1942 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1943 RHSStart+j+i*UnitSize))
1944 return false;
1945 }
1946 return true;
1947}
1948
1949/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1950/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1951/// The ShuffleKind distinguishes between big-endian merges with two
1952/// different inputs (0), either-endian merges with two identical inputs (1),
1953/// and little-endian merges with two different inputs (2). For the latter,
1954/// the input operands are swapped (see PPCInstrAltivec.td).
1956 unsigned ShuffleKind, SelectionDAG &DAG) {
1957 if (DAG.getDataLayout().isLittleEndian()) {
1958 if (ShuffleKind == 1) // unary
1959 return isVMerge(N, UnitSize, 0, 0);
1960 else if (ShuffleKind == 2) // swapped
1961 return isVMerge(N, UnitSize, 0, 16);
1962 else
1963 return false;
1964 } else {
1965 if (ShuffleKind == 1) // unary
1966 return isVMerge(N, UnitSize, 8, 8);
1967 else if (ShuffleKind == 0) // normal
1968 return isVMerge(N, UnitSize, 8, 24);
1969 else
1970 return false;
1971 }
1972}
1973
1974/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1975/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1976/// The ShuffleKind distinguishes between big-endian merges with two
1977/// different inputs (0), either-endian merges with two identical inputs (1),
1978/// and little-endian merges with two different inputs (2). For the latter,
1979/// the input operands are swapped (see PPCInstrAltivec.td).
1981 unsigned ShuffleKind, SelectionDAG &DAG) {
1982 if (DAG.getDataLayout().isLittleEndian()) {
1983 if (ShuffleKind == 1) // unary
1984 return isVMerge(N, UnitSize, 8, 8);
1985 else if (ShuffleKind == 2) // swapped
1986 return isVMerge(N, UnitSize, 8, 24);
1987 else
1988 return false;
1989 } else {
1990 if (ShuffleKind == 1) // unary
1991 return isVMerge(N, UnitSize, 0, 0);
1992 else if (ShuffleKind == 0) // normal
1993 return isVMerge(N, UnitSize, 0, 16);
1994 else
1995 return false;
1996 }
1997}
1998
1999/**
2000 * Common function used to match vmrgew and vmrgow shuffles
2001 *
2002 * The indexOffset determines whether to look for even or odd words in
2003 * the shuffle mask. This is based on the of the endianness of the target
2004 * machine.
2005 * - Little Endian:
2006 * - Use offset of 0 to check for odd elements
2007 * - Use offset of 4 to check for even elements
2008 * - Big Endian:
2009 * - Use offset of 0 to check for even elements
2010 * - Use offset of 4 to check for odd elements
2011 * A detailed description of the vector element ordering for little endian and
2012 * big endian can be found at
2013 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2014 * Targeting your applications - what little endian and big endian IBM XL C/C++
2015 * compiler differences mean to you
2016 *
2017 * The mask to the shuffle vector instruction specifies the indices of the
2018 * elements from the two input vectors to place in the result. The elements are
2019 * numbered in array-access order, starting with the first vector. These vectors
2020 * are always of type v16i8, thus each vector will contain 16 elements of size
2021 * 8. More info on the shuffle vector can be found in the
2022 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2023 * Language Reference.
2024 *
2025 * The RHSStartValue indicates whether the same input vectors are used (unary)
2026 * or two different input vectors are used, based on the following:
2027 * - If the instruction uses the same vector for both inputs, the range of the
2028 * indices will be 0 to 15. In this case, the RHSStart value passed should
2029 * be 0.
2030 * - If the instruction has two different vectors then the range of the
2031 * indices will be 0 to 31. In this case, the RHSStart value passed should
2032 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2033 * to 31 specify elements in the second vector).
2034 *
2035 * \param[in] N The shuffle vector SD Node to analyze
2036 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2037 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2038 * vector to the shuffle_vector instruction
2039 * \return true iff this shuffle vector represents an even or odd word merge
2040 */
2041static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2042 unsigned RHSStartValue) {
2043 if (N->getValueType(0) != MVT::v16i8)
2044 return false;
2045
2046 for (unsigned i = 0; i < 2; ++i)
2047 for (unsigned j = 0; j < 4; ++j)
2048 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2049 i*RHSStartValue+j+IndexOffset) ||
2050 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2051 i*RHSStartValue+j+IndexOffset+8))
2052 return false;
2053 return true;
2054}
2055
2056/**
2057 * Determine if the specified shuffle mask is suitable for the vmrgew or
2058 * vmrgow instructions.
2059 *
2060 * \param[in] N The shuffle vector SD Node to analyze
2061 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2062 * \param[in] ShuffleKind Identify the type of merge:
2063 * - 0 = big-endian merge with two different inputs;
2064 * - 1 = either-endian merge with two identical inputs;
2065 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2066 * little-endian merges).
2067 * \param[in] DAG The current SelectionDAG
2068 * \return true iff this shuffle mask
2069 */
2071 unsigned ShuffleKind, SelectionDAG &DAG) {
2072 if (DAG.getDataLayout().isLittleEndian()) {
2073 unsigned indexOffset = CheckEven ? 4 : 0;
2074 if (ShuffleKind == 1) // Unary
2075 return isVMerge(N, indexOffset, 0);
2076 else if (ShuffleKind == 2) // swapped
2077 return isVMerge(N, indexOffset, 16);
2078 else
2079 return false;
2080 }
2081 else {
2082 unsigned indexOffset = CheckEven ? 0 : 4;
2083 if (ShuffleKind == 1) // Unary
2084 return isVMerge(N, indexOffset, 0);
2085 else if (ShuffleKind == 0) // Normal
2086 return isVMerge(N, indexOffset, 16);
2087 else
2088 return false;
2089 }
2090 return false;
2091}
2092
2093/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2094/// amount, otherwise return -1.
2095/// The ShuffleKind distinguishes between big-endian operations with two
2096/// different inputs (0), either-endian operations with two identical inputs
2097/// (1), and little-endian operations with two different inputs (2). For the
2098/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2099int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2100 SelectionDAG &DAG) {
2101 if (N->getValueType(0) != MVT::v16i8)
2102 return -1;
2103
2105
2106 // Find the first non-undef value in the shuffle mask.
2107 unsigned i;
2108 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2109 /*search*/;
2110
2111 if (i == 16) return -1; // all undef.
2112
2113 // Otherwise, check to see if the rest of the elements are consecutively
2114 // numbered from this value.
2115 unsigned ShiftAmt = SVOp->getMaskElt(i);
2116 if (ShiftAmt < i) return -1;
2117
2118 ShiftAmt -= i;
2119 bool isLE = DAG.getDataLayout().isLittleEndian();
2120
2121 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2122 // Check the rest of the elements to see if they are consecutive.
2123 for (++i; i != 16; ++i)
2124 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2125 return -1;
2126 } else if (ShuffleKind == 1) {
2127 // Check the rest of the elements to see if they are consecutive.
2128 for (++i; i != 16; ++i)
2129 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2130 return -1;
2131 } else
2132 return -1;
2133
2134 if (isLE)
2135 ShiftAmt = 16 - ShiftAmt;
2136
2137 return ShiftAmt;
2138}
2139
2140/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2141/// specifies a splat of a single element that is suitable for input to
2142/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2144 EVT VT = N->getValueType(0);
2145 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2146 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2147
2148 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2149 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2150
2151 // The consecutive indices need to specify an element, not part of two
2152 // different elements. So abandon ship early if this isn't the case.
2153 if (N->getMaskElt(0) % EltSize != 0)
2154 return false;
2155
2156 // This is a splat operation if each element of the permute is the same, and
2157 // if the value doesn't reference the second vector.
2158 unsigned ElementBase = N->getMaskElt(0);
2159
2160 // FIXME: Handle UNDEF elements too!
2161 if (ElementBase >= 16)
2162 return false;
2163
2164 // Check that the indices are consecutive, in the case of a multi-byte element
2165 // splatted with a v16i8 mask.
2166 for (unsigned i = 1; i != EltSize; ++i)
2167 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2168 return false;
2169
2170 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2171 if (N->getMaskElt(i) < 0) continue;
2172 for (unsigned j = 0; j != EltSize; ++j)
2173 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2174 return false;
2175 }
2176 return true;
2177}
2178
2179/// Check that the mask is shuffling N byte elements. Within each N byte
2180/// element of the mask, the indices could be either in increasing or
2181/// decreasing order as long as they are consecutive.
2182/// \param[in] N the shuffle vector SD Node to analyze
2183/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2184/// Word/DoubleWord/QuadWord).
2185/// \param[in] StepLen the delta indices number among the N byte element, if
2186/// the mask is in increasing/decreasing order then it is 1/-1.
2187/// \return true iff the mask is shuffling N byte elements.
2188static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2189 int StepLen) {
2190 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2191 "Unexpected element width.");
2192 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2193
2194 unsigned NumOfElem = 16 / Width;
2195 unsigned MaskVal[16]; // Width is never greater than 16
2196 for (unsigned i = 0; i < NumOfElem; ++i) {
2197 MaskVal[0] = N->getMaskElt(i * Width);
2198 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2199 return false;
2200 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2201 return false;
2202 }
2203
2204 for (unsigned int j = 1; j < Width; ++j) {
2205 MaskVal[j] = N->getMaskElt(i * Width + j);
2206 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2207 return false;
2208 }
2209 }
2210 }
2211
2212 return true;
2213}
2214
2215bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2216 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2217 if (!isNByteElemShuffleMask(N, 4, 1))
2218 return false;
2219
2220 // Now we look at mask elements 0,4,8,12
2221 unsigned M0 = N->getMaskElt(0) / 4;
2222 unsigned M1 = N->getMaskElt(4) / 4;
2223 unsigned M2 = N->getMaskElt(8) / 4;
2224 unsigned M3 = N->getMaskElt(12) / 4;
2225 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2226 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2227
2228 // Below, let H and L be arbitrary elements of the shuffle mask
2229 // where H is in the range [4,7] and L is in the range [0,3].
2230 // H, 1, 2, 3 or L, 5, 6, 7
2231 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2232 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2233 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2234 InsertAtByte = IsLE ? 12 : 0;
2235 Swap = M0 < 4;
2236 return true;
2237 }
2238 // 0, H, 2, 3 or 4, L, 6, 7
2239 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2240 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2241 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2242 InsertAtByte = IsLE ? 8 : 4;
2243 Swap = M1 < 4;
2244 return true;
2245 }
2246 // 0, 1, H, 3 or 4, 5, L, 7
2247 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2248 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2249 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2250 InsertAtByte = IsLE ? 4 : 8;
2251 Swap = M2 < 4;
2252 return true;
2253 }
2254 // 0, 1, 2, H or 4, 5, 6, L
2255 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2256 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2257 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2258 InsertAtByte = IsLE ? 0 : 12;
2259 Swap = M3 < 4;
2260 return true;
2261 }
2262
2263 // If both vector operands for the shuffle are the same vector, the mask will
2264 // contain only elements from the first one and the second one will be undef.
2265 if (N->getOperand(1).isUndef()) {
2266 ShiftElts = 0;
2267 Swap = true;
2268 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2269 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2270 InsertAtByte = IsLE ? 12 : 0;
2271 return true;
2272 }
2273 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2274 InsertAtByte = IsLE ? 8 : 4;
2275 return true;
2276 }
2277 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2278 InsertAtByte = IsLE ? 4 : 8;
2279 return true;
2280 }
2281 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2282 InsertAtByte = IsLE ? 0 : 12;
2283 return true;
2284 }
2285 }
2286
2287 return false;
2288}
2289
2291 bool &Swap, bool IsLE) {
2292 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2293 // Ensure each byte index of the word is consecutive.
2294 if (!isNByteElemShuffleMask(N, 4, 1))
2295 return false;
2296
2297 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2298 unsigned M0 = N->getMaskElt(0) / 4;
2299 unsigned M1 = N->getMaskElt(4) / 4;
2300 unsigned M2 = N->getMaskElt(8) / 4;
2301 unsigned M3 = N->getMaskElt(12) / 4;
2302
2303 // If both vector operands for the shuffle are the same vector, the mask will
2304 // contain only elements from the first one and the second one will be undef.
2305 if (N->getOperand(1).isUndef()) {
2306 assert(M0 < 4 && "Indexing into an undef vector?");
2307 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2308 return false;
2309
2310 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2311 Swap = false;
2312 return true;
2313 }
2314
2315 // Ensure each word index of the ShuffleVector Mask is consecutive.
2316 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2317 return false;
2318
2319 if (IsLE) {
2320 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2321 // Input vectors don't need to be swapped if the leading element
2322 // of the result is one of the 3 left elements of the second vector
2323 // (or if there is no shift to be done at all).
2324 Swap = false;
2325 ShiftElts = (8 - M0) % 8;
2326 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2327 // Input vectors need to be swapped if the leading element
2328 // of the result is one of the 3 left elements of the first vector
2329 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2330 Swap = true;
2331 ShiftElts = (4 - M0) % 4;
2332 }
2333
2334 return true;
2335 } else { // BE
2336 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2337 // Input vectors don't need to be swapped if the leading element
2338 // of the result is one of the 4 elements of the first vector.
2339 Swap = false;
2340 ShiftElts = M0;
2341 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2342 // Input vectors need to be swapped if the leading element
2343 // of the result is one of the 4 elements of the right vector.
2344 Swap = true;
2345 ShiftElts = M0 - 4;
2346 }
2347
2348 return true;
2349 }
2350}
2351
2353 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2354
2355 if (!isNByteElemShuffleMask(N, Width, -1))
2356 return false;
2357
2358 for (int i = 0; i < 16; i += Width)
2359 if (N->getMaskElt(i) != i + Width - 1)
2360 return false;
2361
2362 return true;
2363}
2364
2368
2372
2376
2380
2381/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2382/// if the inputs to the instruction should be swapped and set \p DM to the
2383/// value for the immediate.
2384/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2385/// AND element 0 of the result comes from the first input (LE) or second input
2386/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2387/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2388/// mask.
2390 bool &Swap, bool IsLE) {
2391 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2392
2393 // Ensure each byte index of the double word is consecutive.
2394 if (!isNByteElemShuffleMask(N, 8, 1))
2395 return false;
2396
2397 unsigned M0 = N->getMaskElt(0) / 8;
2398 unsigned M1 = N->getMaskElt(8) / 8;
2399 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2400
2401 // If both vector operands for the shuffle are the same vector, the mask will
2402 // contain only elements from the first one and the second one will be undef.
2403 if (N->getOperand(1).isUndef()) {
2404 if ((M0 | M1) < 2) {
2405 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2406 Swap = false;
2407 return true;
2408 } else
2409 return false;
2410 }
2411
2412 if (IsLE) {
2413 if (M0 > 1 && M1 < 2) {
2414 Swap = false;
2415 } else if (M0 < 2 && M1 > 1) {
2416 M0 = (M0 + 2) % 4;
2417 M1 = (M1 + 2) % 4;
2418 Swap = true;
2419 } else
2420 return false;
2421
2422 // Note: if control flow comes here that means Swap is already set above
2423 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2424 return true;
2425 } else { // BE
2426 if (M0 < 2 && M1 > 1) {
2427 Swap = false;
2428 } else if (M0 > 1 && M1 < 2) {
2429 M0 = (M0 + 2) % 4;
2430 M1 = (M1 + 2) % 4;
2431 Swap = true;
2432 } else
2433 return false;
2434
2435 // Note: if control flow comes here that means Swap is already set above
2436 DM = (M0 << 1) + (M1 & 1);
2437 return true;
2438 }
2439}
2440
2441
2442/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2443/// appropriate for PPC mnemonics (which have a big endian bias - namely
2444/// elements are counted from the left of the vector register).
2445unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2446 SelectionDAG &DAG) {
2448 assert(isSplatShuffleMask(SVOp, EltSize));
2449 EVT VT = SVOp->getValueType(0);
2450
2451 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2452 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2453 : SVOp->getMaskElt(0);
2454
2455 if (DAG.getDataLayout().isLittleEndian())
2456 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2457 else
2458 return SVOp->getMaskElt(0) / EltSize;
2459}
2460
2461/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2462/// by using a vspltis[bhw] instruction of the specified element size, return
2463/// the constant being splatted. The ByteSize field indicates the number of
2464/// bytes of each element [124] -> [bhw].
2466 SDValue OpVal;
2467
2468 // If ByteSize of the splat is bigger than the element size of the
2469 // build_vector, then we have a case where we are checking for a splat where
2470 // multiple elements of the buildvector are folded together into a single
2471 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2472 unsigned EltSize = 16/N->getNumOperands();
2473 if (EltSize < ByteSize) {
2474 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2475 SDValue UniquedVals[4];
2476 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2477
2478 // See if all of the elements in the buildvector agree across.
2479 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2480 if (N->getOperand(i).isUndef()) continue;
2481 // If the element isn't a constant, bail fully out.
2482 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2483
2484 if (!UniquedVals[i&(Multiple-1)].getNode())
2485 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2486 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2487 return SDValue(); // no match.
2488 }
2489
2490 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2491 // either constant or undef values that are identical for each chunk. See
2492 // if these chunks can form into a larger vspltis*.
2493
2494 // Check to see if all of the leading entries are either 0 or -1. If
2495 // neither, then this won't fit into the immediate field.
2496 bool LeadingZero = true;
2497 bool LeadingOnes = true;
2498 for (unsigned i = 0; i != Multiple-1; ++i) {
2499 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2500
2501 LeadingZero &= isNullConstant(UniquedVals[i]);
2502 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2503 }
2504 // Finally, check the least significant entry.
2505 if (LeadingZero) {
2506 if (!UniquedVals[Multiple-1].getNode())
2507 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2508 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2509 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2510 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2511 }
2512 if (LeadingOnes) {
2513 if (!UniquedVals[Multiple-1].getNode())
2514 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2515 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2516 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2517 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2518 }
2519
2520 return SDValue();
2521 }
2522
2523 // Check to see if this buildvec has a single non-undef value in its elements.
2524 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2525 if (N->getOperand(i).isUndef()) continue;
2526 if (!OpVal.getNode())
2527 OpVal = N->getOperand(i);
2528 else if (OpVal != N->getOperand(i))
2529 return SDValue();
2530 }
2531
2532 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2533
2534 unsigned ValSizeInBytes = EltSize;
2535 uint64_t Value = 0;
2536 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2537 Value = CN->getZExtValue();
2538 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2539 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2540 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2541 }
2542
2543 // If the splat value is larger than the element value, then we can never do
2544 // this splat. The only case that we could fit the replicated bits into our
2545 // immediate field for would be zero, and we prefer to use vxor for it.
2546 if (ValSizeInBytes < ByteSize) return SDValue();
2547
2548 // If the element value is larger than the splat value, check if it consists
2549 // of a repeated bit pattern of size ByteSize.
2550 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2551 return SDValue();
2552
2553 // Properly sign extend the value.
2554 int MaskVal = SignExtend32(Value, ByteSize * 8);
2555
2556 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2557 if (MaskVal == 0) return SDValue();
2558
2559 // Finally, if this value fits in a 5 bit sext field, return it
2560 if (SignExtend32<5>(MaskVal) == MaskVal)
2561 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2562 return SDValue();
2563}
2564
2565//===----------------------------------------------------------------------===//
2566// Addressing Mode Selection
2567//===----------------------------------------------------------------------===//
2568
2569/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2570/// or 64-bit immediate, and if the value can be accurately represented as a
2571/// sign extension from a 16-bit value. If so, this returns true and the
2572/// immediate.
2573bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2574 if (!isa<ConstantSDNode>(N))
2575 return false;
2576
2577 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2578 if (N->getValueType(0) == MVT::i32)
2579 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2580 else
2581 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2582}
2583bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2584 return isIntS16Immediate(Op.getNode(), Imm);
2585}
2586
2587/// Used when computing address flags for selecting loads and stores.
2588/// If we have an OR, check if the LHS and RHS are provably disjoint.
2589/// An OR of two provably disjoint values is equivalent to an ADD.
2590/// Most PPC load/store instructions compute the effective address as a sum,
2591/// so doing this conversion is useful.
2592static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2593 if (N.getOpcode() != ISD::OR)
2594 return false;
2595 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2596 if (!LHSKnown.Zero.getBoolValue())
2597 return false;
2598 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2599 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2600}
2601
2602/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2603/// be represented as an indexed [r+r] operation.
2605 SDValue &Index,
2606 SelectionDAG &DAG) const {
2607 for (SDNode *U : N->uses()) {
2608 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2609 if (Memop->getMemoryVT() == MVT::f64) {
2610 Base = N.getOperand(0);
2611 Index = N.getOperand(1);
2612 return true;
2613 }
2614 }
2615 }
2616 return false;
2617}
2618
2619/// isIntS34Immediate - This method tests if value of node given can be
2620/// accurately represented as a sign extension from a 34-bit value. If so,
2621/// this returns true and the immediate.
2622bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2623 if (!isa<ConstantSDNode>(N))
2624 return false;
2625
2626 Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2627 return isInt<34>(Imm);
2628}
2629bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2630 return isIntS34Immediate(Op.getNode(), Imm);
2631}
2632
2633/// SelectAddressRegReg - Given the specified addressed, check to see if it
2634/// can be represented as an indexed [r+r] operation. Returns false if it
2635/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2636/// non-zero and N can be represented by a base register plus a signed 16-bit
2637/// displacement, make a more precise judgement by checking (displacement % \p
2638/// EncodingAlignment).
2641 MaybeAlign EncodingAlignment) const {
2642 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2643 // a [pc+imm].
2645 return false;
2646
2647 int16_t Imm = 0;
2648 if (N.getOpcode() == ISD::ADD) {
2649 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2650 // SPE load/store can only handle 8-bit offsets.
2651 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2652 return true;
2653 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2654 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2655 return false; // r+i
2656 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2657 return false; // r+i
2658
2659 Base = N.getOperand(0);
2660 Index = N.getOperand(1);
2661 return true;
2662 } else if (N.getOpcode() == ISD::OR) {
2663 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2664 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2665 return false; // r+i can fold it if we can.
2666
2667 // If this is an or of disjoint bitfields, we can codegen this as an add
2668 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2669 // disjoint.
2670 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2671
2672 if (LHSKnown.Zero.getBoolValue()) {
2673 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2674 // If all of the bits are known zero on the LHS or RHS, the add won't
2675 // carry.
2676 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2677 Base = N.getOperand(0);
2678 Index = N.getOperand(1);
2679 return true;
2680 }
2681 }
2682 }
2683
2684 return false;
2685}
2686
2687// If we happen to be doing an i64 load or store into a stack slot that has
2688// less than a 4-byte alignment, then the frame-index elimination may need to
2689// use an indexed load or store instruction (because the offset may not be a
2690// multiple of 4). The extra register needed to hold the offset comes from the
2691// register scavenger, and it is possible that the scavenger will need to use
2692// an emergency spill slot. As a result, we need to make sure that a spill slot
2693// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2694// stack slot.
2695static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2696 // FIXME: This does not handle the LWA case.
2697 if (VT != MVT::i64)
2698 return;
2699
2700 // NOTE: We'll exclude negative FIs here, which come from argument
2701 // lowering, because there are no known test cases triggering this problem
2702 // using packed structures (or similar). We can remove this exclusion if
2703 // we find such a test case. The reason why this is so test-case driven is
2704 // because this entire 'fixup' is only to prevent crashes (from the
2705 // register scavenger) on not-really-valid inputs. For example, if we have:
2706 // %a = alloca i1
2707 // %b = bitcast i1* %a to i64*
2708 // store i64* a, i64 b
2709 // then the store should really be marked as 'align 1', but is not. If it
2710 // were marked as 'align 1' then the indexed form would have been
2711 // instruction-selected initially, and the problem this 'fixup' is preventing
2712 // won't happen regardless.
2713 if (FrameIdx < 0)
2714 return;
2715
2717 MachineFrameInfo &MFI = MF.getFrameInfo();
2718
2719 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2720 return;
2721
2722 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2723 FuncInfo->setHasNonRISpills();
2724}
2725
2726/// Returns true if the address N can be represented by a base register plus
2727/// a signed 16-bit displacement [r+imm], and if it is not better
2728/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2729/// displacements that are multiples of that value.
2731 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2732 MaybeAlign EncodingAlignment) const {
2733 // FIXME dl should come from parent load or store, not from address
2734 SDLoc dl(N);
2735
2736 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2737 // a [pc+imm].
2739 return false;
2740
2741 // If this can be more profitably realized as r+r, fail.
2742 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2743 return false;
2744
2745 if (N.getOpcode() == ISD::ADD) {
2746 int16_t imm = 0;
2747 if (isIntS16Immediate(N.getOperand(1), imm) &&
2748 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2749 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2750 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2751 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2752 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2753 } else {
2754 Base = N.getOperand(0);
2755 }
2756 return true; // [r+i]
2757 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2758 // Match LOAD (ADD (X, Lo(G))).
2759 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2760 && "Cannot handle constant offsets yet!");
2761 Disp = N.getOperand(1).getOperand(0); // The global address.
2766 Base = N.getOperand(0);
2767 return true; // [&g+r]
2768 }
2769 } else if (N.getOpcode() == ISD::OR) {
2770 int16_t imm = 0;
2771 if (isIntS16Immediate(N.getOperand(1), imm) &&
2772 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2773 // If this is an or of disjoint bitfields, we can codegen this as an add
2774 // (for better address arithmetic) if the LHS and RHS of the OR are
2775 // provably disjoint.
2776 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2777
2778 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2779 // If all of the bits are known zero on the LHS or RHS, the add won't
2780 // carry.
2781 if (FrameIndexSDNode *FI =
2782 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2783 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2784 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2785 } else {
2786 Base = N.getOperand(0);
2787 }
2788 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2789 return true;
2790 }
2791 }
2792 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2793 // Loading from a constant address.
2794
2795 // If this address fits entirely in a 16-bit sext immediate field, codegen
2796 // this as "d, 0"
2797 int16_t Imm;
2798 if (isIntS16Immediate(CN, Imm) &&
2799 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2800 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2801 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2802 CN->getValueType(0));
2803 return true;
2804 }
2805
2806 // Handle 32-bit sext immediates with LIS + addr mode.
2807 if ((CN->getValueType(0) == MVT::i32 ||
2808 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2809 (!EncodingAlignment ||
2810 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2811 int Addr = (int)CN->getZExtValue();
2812
2813 // Otherwise, break this down into an LIS + disp.
2814 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2815
2816 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2817 MVT::i32);
2818 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2819 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2820 return true;
2821 }
2822 }
2823
2824 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2826 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2827 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2828 } else
2829 Base = N;
2830 return true; // [r+0]
2831}
2832
2833/// Similar to the 16-bit case but for instructions that take a 34-bit
2834/// displacement field (prefixed loads/stores).
2836 SDValue &Base,
2837 SelectionDAG &DAG) const {
2838 // Only on 64-bit targets.
2839 if (N.getValueType() != MVT::i64)
2840 return false;
2841
2842 SDLoc dl(N);
2843 int64_t Imm = 0;
2844
2845 if (N.getOpcode() == ISD::ADD) {
2846 if (!isIntS34Immediate(N.getOperand(1), Imm))
2847 return false;
2848 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2849 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2850 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2851 else
2852 Base = N.getOperand(0);
2853 return true;
2854 }
2855
2856 if (N.getOpcode() == ISD::OR) {
2857 if (!isIntS34Immediate(N.getOperand(1), Imm))
2858 return false;
2859 // If this is an or of disjoint bitfields, we can codegen this as an add
2860 // (for better address arithmetic) if the LHS and RHS of the OR are
2861 // provably disjoint.
2862 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2863 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2864 return false;
2865 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2866 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2867 else
2868 Base = N.getOperand(0);
2869 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2870 return true;
2871 }
2872
2873 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2874 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2875 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2876 return true;
2877 }
2878
2879 return false;
2880}
2881
2882/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2883/// represented as an indexed [r+r] operation.
2885 SDValue &Index,
2886 SelectionDAG &DAG) const {
2887 // Check to see if we can easily represent this as an [r+r] address. This
2888 // will fail if it thinks that the address is more profitably represented as
2889 // reg+imm, e.g. where imm = 0.
2890 if (SelectAddressRegReg(N, Base, Index, DAG))
2891 return true;
2892
2893 // If the address is the result of an add, we will utilize the fact that the
2894 // address calculation includes an implicit add. However, we can reduce
2895 // register pressure if we do not materialize a constant just for use as the
2896 // index register. We only get rid of the add if it is not an add of a
2897 // value and a 16-bit signed constant and both have a single use.
2898 int16_t imm = 0;
2899 if (N.getOpcode() == ISD::ADD &&
2900 (!isIntS16Immediate(N.getOperand(1), imm) ||
2901 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2902 Base = N.getOperand(0);
2903 Index = N.getOperand(1);
2904 return true;
2905 }
2906
2907 // Otherwise, do it the hard way, using R0 as the base register.
2908 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2909 N.getValueType());
2910 Index = N;
2911 return true;
2912}
2913
2914template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2915 Ty *PCRelCand = dyn_cast<Ty>(N);
2916 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2917}
2918
2919/// Returns true if this address is a PC Relative address.
2920/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2921/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2923 // This is a materialize PC Relative node. Always select this as PC Relative.
2924 Base = N;
2925 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2926 return true;
2931 return true;
2932 return false;
2933}
2934
2935/// Returns true if we should use a direct load into vector instruction
2936/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2937static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2938
2939 // If there are any other uses other than scalar to vector, then we should
2940 // keep it as a scalar load -> direct move pattern to prevent multiple
2941 // loads.
2943 if (!LD)
2944 return false;
2945
2946 EVT MemVT = LD->getMemoryVT();
2947 if (!MemVT.isSimple())
2948 return false;
2949 switch(MemVT.getSimpleVT().SimpleTy) {
2950 case MVT::i64:
2951 break;
2952 case MVT::i32:
2953 if (!ST.hasP8Vector())
2954 return false;
2955 break;
2956 case MVT::i16:
2957 case MVT::i8:
2958 if (!ST.hasP9Vector())
2959 return false;
2960 break;
2961 default:
2962 return false;
2963 }
2964
2965 SDValue LoadedVal(N, 0);
2966 if (!LoadedVal.hasOneUse())
2967 return false;
2968
2969 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2970 UI != UE; ++UI)
2971 if (UI.getUse().get().getResNo() == 0 &&
2972 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2973 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2974 return false;
2975
2976 return true;
2977}
2978
2979/// getPreIndexedAddressParts - returns true by value, base pointer and
2980/// offset pointer and addressing mode by reference if the node's address
2981/// can be legally represented as pre-indexed load / store address.
2983 SDValue &Offset,
2985 SelectionDAG &DAG) const {
2986 if (DisablePPCPreinc) return false;
2987
2988 bool isLoad = true;
2989 SDValue Ptr;
2990 EVT VT;
2991 Align Alignment;
2992 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2993 Ptr = LD->getBasePtr();
2994 VT = LD->getMemoryVT();
2995 Alignment = LD->getAlign();
2996 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2997 Ptr = ST->getBasePtr();
2998 VT = ST->getMemoryVT();
2999 Alignment = ST->getAlign();
3000 isLoad = false;
3001 } else
3002 return false;
3003
3004 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3005 // instructions because we can fold these into a more efficient instruction
3006 // instead, (such as LXSD).
3007 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3008 return false;
3009 }
3010
3011 // PowerPC doesn't have preinc load/store instructions for vectors
3012 if (VT.isVector())
3013 return false;
3014
3015 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3016 // Common code will reject creating a pre-inc form if the base pointer
3017 // is a frame index, or if N is a store and the base pointer is either
3018 // the same as or a predecessor of the value being stored. Check for
3019 // those situations here, and try with swapped Base/Offset instead.
3020 bool Swap = false;
3021
3023 Swap = true;
3024 else if (!isLoad) {
3025 SDValue Val = cast<StoreSDNode>(N)->getValue();
3026 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3027 Swap = true;
3028 }
3029
3030 if (Swap)
3032
3033 AM = ISD::PRE_INC;
3034 return true;
3035 }
3036
3037 // LDU/STU can only handle immediates that are a multiple of 4.
3038 if (VT != MVT::i64) {
3039 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
3040 return false;
3041 } else {
3042 // LDU/STU need an address with at least 4-byte alignment.
3043 if (Alignment < Align(4))
3044 return false;
3045
3046 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3047 return false;
3048 }
3049
3050 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3051 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3052 // sext i32 to i64 when addr mode is r+i.
3053 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3054 LD->getExtensionType() == ISD::SEXTLOAD &&
3056 return false;
3057 }
3058
3059 AM = ISD::PRE_INC;
3060 return true;
3061}
3062
3063//===----------------------------------------------------------------------===//
3064// LowerOperation implementation
3065//===----------------------------------------------------------------------===//
3066
3067/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3068/// and LoOpFlags to the target MO flags.
3069static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3070 unsigned &HiOpFlags, unsigned &LoOpFlags,
3071 const GlobalValue *GV = nullptr) {
3072 HiOpFlags = PPCII::MO_HA;
3073 LoOpFlags = PPCII::MO_LO;
3074
3075 // Don't use the pic base if not in PIC relocation model.
3076 if (IsPIC) {
3077 HiOpFlags |= PPCII::MO_PIC_FLAG;
3078 LoOpFlags |= PPCII::MO_PIC_FLAG;
3079 }
3080}
3081
3082static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3083 SelectionDAG &DAG) {
3084 SDLoc DL(HiPart);
3085 EVT PtrVT = HiPart.getValueType();
3086 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3087
3088 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3089 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3090
3091 // With PIC, the first instruction is actually "GR+hi(&G)".
3092 if (isPIC)
3093 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3094 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3095
3096 // Generate non-pic code that has direct accesses to the constant pool.
3097 // The address of the global is just (hi(&g)+lo(&g)).
3098 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3099}
3100
3102 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3103 FuncInfo->setUsesTOCBasePtr();
3104}
3105
3109
3110SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3111 SDValue GA) const {
3112 const bool Is64Bit = Subtarget.isPPC64();
3113 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3114 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3115 : Subtarget.isAIXABI()
3116 ? DAG.getRegister(PPC::R2, VT)
3117 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3118 SDValue Ops[] = { GA, Reg };
3119 return DAG.getMemIntrinsicNode(
3120 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3123}
3124
3125SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3126 SelectionDAG &DAG) const {
3127 EVT PtrVT = Op.getValueType();
3129 const Constant *C = CP->getConstVal();
3130
3131 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3132 // The actual address of the GlobalValue is stored in the TOC.
3133 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3134 if (Subtarget.isUsingPCRelativeCalls()) {
3135 SDLoc DL(CP);
3136 EVT Ty = getPointerTy(DAG.getDataLayout());
3137 SDValue ConstPool = DAG.getTargetConstantPool(
3138 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3139 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3140 }
3141 setUsesTOCBasePtr(DAG);
3142 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3143 return getTOCEntry(DAG, SDLoc(CP), GA);
3144 }
3145
3146 unsigned MOHiFlag, MOLoFlag;
3147 bool IsPIC = isPositionIndependent();
3148 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3149
3150 if (IsPIC && Subtarget.isSVR4ABI()) {
3151 SDValue GA =
3152 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3153 return getTOCEntry(DAG, SDLoc(CP), GA);
3154 }
3155
3156 SDValue CPIHi =
3157 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3158 SDValue CPILo =
3159 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3160 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3161}
3162
3163// For 64-bit PowerPC, prefer the more compact relative encodings.
3164// This trades 32 bits per jump table entry for one or two instructions
3165// on the jump site.
3172
3175 return false;
3176 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3177 return true;
3179}
3180
3182 SelectionDAG &DAG) const {
3183 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3185
3186 switch (getTargetMachine().getCodeModel()) {
3187 case CodeModel::Small:
3188 case CodeModel::Medium:
3190 default:
3191 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3193 }
3194}
3195
3196const MCExpr *
3198 unsigned JTI,
3199 MCContext &Ctx) const {
3200 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3202
3203 switch (getTargetMachine().getCodeModel()) {
3204 case CodeModel::Small:
3205 case CodeModel::Medium:
3207 default:
3208 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3209 }
3210}
3211
3212SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3213 EVT PtrVT = Op.getValueType();
3215
3216 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3217 if (Subtarget.isUsingPCRelativeCalls()) {
3218 SDLoc DL(JT);
3219 EVT Ty = getPointerTy(DAG.getDataLayout());
3220 SDValue GA =
3221 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3222 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3223 return MatAddr;
3224 }
3225
3226 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3227 // The actual address of the GlobalValue is stored in the TOC.
3228 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3229 setUsesTOCBasePtr(DAG);
3230 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3231 return getTOCEntry(DAG, SDLoc(JT), GA);
3232 }
3233
3234 unsigned MOHiFlag, MOLoFlag;
3235 bool IsPIC = isPositionIndependent();
3236 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3237
3238 if (IsPIC && Subtarget.isSVR4ABI()) {
3239 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3241 return getTOCEntry(DAG, SDLoc(GA), GA);
3242 }
3243
3244 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3245 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3246 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3247}
3248
3249SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3250 SelectionDAG &DAG) const {
3251 EVT PtrVT = Op.getValueType();
3253 const BlockAddress *BA = BASDN->getBlockAddress();
3254
3255 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3256 if (Subtarget.isUsingPCRelativeCalls()) {
3257 SDLoc DL(BASDN);
3258 EVT Ty = getPointerTy(DAG.getDataLayout());
3259 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3261 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3262 return MatAddr;
3263 }
3264
3265 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3266 // The actual BlockAddress is stored in the TOC.
3267 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3268 setUsesTOCBasePtr(DAG);
3269 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3270 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3271 }
3272
3273 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3274 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3275 return getTOCEntry(
3276 DAG, SDLoc(BASDN),
3277 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3278
3279 unsigned MOHiFlag, MOLoFlag;
3280 bool IsPIC = isPositionIndependent();
3281 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3282 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3283 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3284 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3285}
3286
3287SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3288 SelectionDAG &DAG) const {
3289 if (Subtarget.isAIXABI())
3290 return LowerGlobalTLSAddressAIX(Op, DAG);
3291
3292 return LowerGlobalTLSAddressLinux(Op, DAG);
3293}
3294
3295SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3296 SelectionDAG &DAG) const {
3298
3299 if (DAG.getTarget().useEmulatedTLS())
3300 report_fatal_error("Emulated TLS is not yet supported on AIX");
3301
3302 SDLoc dl(GA);
3303 const GlobalValue *GV = GA->getGlobal();
3304 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3305
3306 // The general-dynamic model is the only access model supported for now, so
3307 // all the GlobalTLSAddress nodes are lowered with this model.
3308 // We need to generate two TOC entries, one for the variable offset, one for
3309 // the region handle. The global address for the TOC entry of the region
3310 // handle is created with the MO_TLSGDM_FLAG flag and the global address
3311 // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
3312 SDValue VariableOffsetTGA =
3313 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3314 SDValue RegionHandleTGA =
3315 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3316 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3317 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3318 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3319 RegionHandle);
3320}
3321
3322SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3323 SelectionDAG &DAG) const {
3324 // FIXME: TLS addresses currently use medium model code sequences,
3325 // which is the most useful form. Eventually support for small and
3326 // large models could be added if users need it, at the cost of
3327 // additional complexity.
3329 if (DAG.getTarget().useEmulatedTLS())
3330 return LowerToTLSEmulatedModel(GA, DAG);
3331
3332 SDLoc dl(GA);
3333 const GlobalValue *GV = GA->getGlobal();
3334 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3335 bool is64bit = Subtarget.isPPC64();
3336 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3337 PICLevel::Level picLevel = M->getPICLevel();
3338
3340 TLSModel::Model Model = TM.getTLSModel(GV);
3341
3342 if (Model == TLSModel::LocalExec) {
3343 if (Subtarget.isUsingPCRelativeCalls()) {
3344 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3346 GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3347 SDValue MatAddr =
3348 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3349 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3350 }
3351
3352 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3354 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3356 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3357 : DAG.getRegister(PPC::R2, MVT::i32);
3358
3359 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3360 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3361 }
3362
3363 if (Model == TLSModel::InitialExec) {
3364 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3366 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3367 SDValue TGATLS = DAG.getTargetGlobalAddress(
3368 GV, dl, PtrVT, 0,
3370 SDValue TPOffset;
3371 if (IsPCRel) {
3372 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3373 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3375 } else {
3376 SDValue GOTPtr;
3377 if (is64bit) {
3378 setUsesTOCBasePtr(DAG);
3379 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3380 GOTPtr =
3381 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3382 } else {
3383 if (!TM.isPositionIndependent())
3384 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3385 else if (picLevel == PICLevel::SmallPIC)
3386 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3387 else
3388 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3389 }
3390 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3391 }
3392 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3393 }
3394
3395 if (Model == TLSModel::GeneralDynamic) {
3396 if (Subtarget.isUsingPCRelativeCalls()) {
3397 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3399 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3400 }
3401
3402 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3403 SDValue GOTPtr;
3404 if (is64bit) {
3405 setUsesTOCBasePtr(DAG);
3406 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3407 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3408 GOTReg, TGA);
3409 } else {
3410 if (picLevel == PICLevel::SmallPIC)
3411 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3412 else
3413 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3414 }
3415 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3416 GOTPtr, TGA, TGA);
3417 }
3418
3419 if (Model == TLSModel::LocalDynamic) {
3420 if (Subtarget.isUsingPCRelativeCalls()) {
3421 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3423 SDValue MatPCRel =
3424 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3425 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3426 }
3427
3428 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3429 SDValue GOTPtr;
3430 if (is64bit) {
3431 setUsesTOCBasePtr(DAG);
3432 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3433 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3434 GOTReg, TGA);
3435 } else {
3436 if (picLevel == PICLevel::SmallPIC)
3437 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3438 else
3439 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3440 }
3441 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3442 PtrVT, GOTPtr, TGA, TGA);
3443 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3444 PtrVT, TLSAddr, TGA);
3445 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3446 }
3447
3448 llvm_unreachable("Unknown TLS model!");
3449}
3450
3451SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3452 SelectionDAG &DAG) const {
3453 EVT PtrVT = Op.getValueType();
3455 SDLoc DL(GSDN);
3456 const GlobalValue *GV = GSDN->getGlobal();
3457
3458 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3459 // The actual address of the GlobalValue is stored in the TOC.
3460 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3461 if (Subtarget.isUsingPCRelativeCalls()) {
3462 EVT Ty = getPointerTy(DAG.getDataLayout());
3463 if (isAccessedAsGotIndirect(Op)) {
3464 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3467 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3468 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3470 return Load;
3471 } else {
3472 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3474 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3475 }
3476 }
3477 setUsesTOCBasePtr(DAG);
3478 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3479 return getTOCEntry(DAG, DL, GA);
3480 }
3481
3482 unsigned MOHiFlag, MOLoFlag;
3483 bool IsPIC = isPositionIndependent();
3484 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3485
3486 if (IsPIC && Subtarget.isSVR4ABI()) {
3487 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3488 GSDN->getOffset(),
3490 return getTOCEntry(DAG, DL, GA);
3491 }
3492
3493 SDValue GAHi =
3494 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3495 SDValue GALo =
3496 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3497
3498 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3499}
3500
3501SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3502 bool IsStrict = Op->isStrictFPOpcode();
3503 ISD::CondCode CC =
3504 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3505 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3506 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3507 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3508 EVT LHSVT = LHS.getValueType();
3509 SDLoc dl(Op);
3510
3511 // Soften the setcc with libcall if it is fp128.
3512 if (LHSVT == MVT::f128) {
3513 assert(!Subtarget.hasP9Vector() &&
3514 "SETCC for f128 is already legal under Power9!");
3515 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3516 Op->getOpcode() == ISD::STRICT_FSETCCS);
3517 if (RHS.getNode())
3518 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3519 DAG.getCondCode(CC));
3520 if (IsStrict)
3521 return DAG.getMergeValues({LHS, Chain}, dl);
3522 return LHS;
3523 }
3524
3525 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3526
3527 if (Op.getValueType() == MVT::v2i64) {
3528 // When the operands themselves are v2i64 values, we need to do something
3529 // special because VSX has no underlying comparison operations for these.
3530 if (LHS.getValueType() == MVT::v2i64) {
3531 // Equality can be handled by casting to the legal type for Altivec
3532 // comparisons, everything else needs to be expanded.
3533 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3534 return SDValue();
3535 SDValue SetCC32 = DAG.getSetCC(
3536 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3537 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3538 int ShuffV[] = {1, 0, 3, 2};
3539 SDValue Shuff =
3540 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3541 return DAG.getBitcast(MVT::v2i64,
3542 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3543 dl, MVT::v4i32, Shuff, SetCC32));
3544 }
3545
3546 // We handle most of these in the usual way.
3547 return Op;
3548 }
3549
3550 // If we're comparing for equality to zero, expose the fact that this is
3551 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3552 // fold the new nodes.
3553 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3554 return V;
3555
3557 // Leave comparisons against 0 and -1 alone for now, since they're usually
3558 // optimized. FIXME: revisit this when we can custom lower all setcc
3559 // optimizations.
3560 if (C->isAllOnes() || C->isZero())
3561 return SDValue();
3562 }
3563
3564 // If we have an integer seteq/setne, turn it into a compare against zero
3565 // by xor'ing the rhs with the lhs, which is faster than setting a
3566 // condition register, reading it back out, and masking the correct bit. The
3567 // normal approach here uses sub to do this instead of xor. Using xor exposes
3568 // the result to other bit-twiddling opportunities.
3569 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3570 EVT VT = Op.getValueType();
3571 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3572 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3573 }
3574 return SDValue();
3575}
3576
3577SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3578 SDNode *Node = Op.getNode();
3579 EVT VT = Node->getValueType(0);
3580 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3581 SDValue InChain = Node->getOperand(0);
3582 SDValue VAListPtr = Node->getOperand(1);
3583 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3584 SDLoc dl(Node);
3585
3586 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3587
3588 // gpr_index
3589 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3590 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3591 InChain = GprIndex.getValue(1);
3592
3593 if (VT == MVT::i64) {
3594 // Check if GprIndex is even
3595 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3596 DAG.getConstant(1, dl, MVT::i32));
3597 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3598 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3599 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3600 DAG.getConstant(1, dl, MVT::i32));
3601 // Align GprIndex to be even if it isn't
3602 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3603 GprIndex);
3604 }
3605
3606 // fpr index is 1 byte after gpr
3607 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3608 DAG.getConstant(1, dl, MVT::i32));
3609
3610 // fpr
3611 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3612 FprPtr, MachinePointerInfo(SV), MVT::i8);
3613 InChain = FprIndex.getValue(1);
3614
3615 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3616 DAG.getConstant(8, dl, MVT::i32));
3617
3618 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3619 DAG.getConstant(4, dl, MVT::i32));
3620
3621 // areas
3622 SDValue OverflowArea =
3623 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3624 InChain = OverflowArea.getValue(1);
3625
3626 SDValue RegSaveArea =
3627 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3628 InChain = RegSaveArea.getValue(1);
3629
3630 // select overflow_area if index > 8
3631 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3632 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3633
3634 // adjustment constant gpr_index * 4/8
3635 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3636 VT.isInteger() ? GprIndex : FprIndex,
3637 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3638 MVT::i32));
3639
3640 // OurReg = RegSaveArea + RegConstant
3641 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3642 RegConstant);
3643
3644 // Floating types are 32 bytes into RegSaveArea
3645 if (VT.isFloatingPoint())
3646 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3647 DAG.getConstant(32, dl, MVT::i32));
3648
3649 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3650 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3651 VT.isInteger() ? GprIndex : FprIndex,
3652 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3653 MVT::i32));
3654
3655 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3656 VT.isInteger() ? VAListPtr : FprPtr,
3658
3659 // determine if we should load from reg_save_area or overflow_area
3660 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3661
3662 // increase overflow_area by 4/8 if gpr/fpr > 8
3663 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3664 DAG.getConstant(VT.isInteger() ? 4 : 8,
3665 dl, MVT::i32));
3666
3667 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3668 OverflowAreaPlusN);
3669
3670 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3672
3673 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3674}
3675
3676SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3677 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3678
3679 // We have to copy the entire va_list struct:
3680 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3681 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3682 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3683 false, true, false, MachinePointerInfo(),
3685}
3686
3687SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3688 SelectionDAG &DAG) const {
3689 if (Subtarget.isAIXABI())
3690 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3691
3692 return Op.getOperand(0);
3693}
3694
3695SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3698
3699 assert((Op.getOpcode() == ISD::INLINEASM ||
3700 Op.getOpcode() == ISD::INLINEASM_BR) &&
3701 "Expecting Inline ASM node.");
3702
3703 // If an LR store is already known to be required then there is not point in
3704 // checking this ASM as well.
3705 if (MFI.isLRStoreRequired())
3706 return Op;
3707
3708 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3709 // type MVT::Glue. We want to ignore this last operand if that is the case.
3710 unsigned NumOps = Op.getNumOperands();
3711 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3712 --NumOps;
3713
3714 // Check all operands that may contain the LR.
3715 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3716 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3717 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3718 ++i; // Skip the ID value.
3719
3720 switch (InlineAsm::getKind(Flags)) {
3721 default:
3722 llvm_unreachable("Bad flags!");
3726 i += NumVals;
3727 break;
3731 for (; NumVals; --NumVals, ++i) {
3732 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3733 if (Reg != PPC::LR && Reg != PPC::LR8)
3734 continue;
3735 MFI.setLRStoreRequired();
3736 return Op;
3737 }
3738 break;
3739 }
3740 }
3741 }
3742
3743 return Op;
3744}
3745
3746SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3747 SelectionDAG &DAG) const {
3748 if (Subtarget.isAIXABI())
3749 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3750
3751 SDValue Chain = Op.getOperand(0);
3752 SDValue Trmp = Op.getOperand(1); // trampoline
3753 SDValue FPtr = Op.getOperand(2); // nested function
3754 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3755 SDLoc dl(Op);
3756
3757 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3758 bool isPPC64 = (PtrVT == MVT::i64);
3759 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3760
3763
3764 Entry.Ty = IntPtrTy;
3765 Entry.Node = Trmp; Args.push_back(Entry);
3766
3767 // TrampSize == (isPPC64 ? 48 : 40);
3768 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3769 isPPC64 ? MVT::i64 : MVT::i32);
3770 Args.push_back(Entry);
3771
3772 Entry.Node = FPtr; Args.push_back(Entry);
3773 Entry.Node = Nest; Args.push_back(Entry);
3774
3775 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3777 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3779 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3780
3781 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3782 return CallResult.second;
3783}
3784
3785SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3787 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3788 EVT PtrVT = getPointerTy(MF.getDataLayout());
3789
3790 SDLoc dl(Op);
3791
3792 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3793 // vastart just stores the address of the VarArgsFrameIndex slot into the
3794 // memory location argument.
3795 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3796 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3797 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3798 MachinePointerInfo(SV));
3799 }
3800
3801 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3802 // We suppose the given va_list is already allocated.
3803 //
3804 // typedef struct {
3805 // char gpr; /* index into the array of 8 GPRs
3806 // * stored in the register save area
3807 // * gpr=0 corresponds to r3,
3808 // * gpr=1 to r4, etc.
3809 // */
3810 // char fpr; /* index into the array of 8 FPRs
3811 // * stored in the register save area
3812 // * fpr=0 corresponds to f1,
3813 // * fpr=1 to f2, etc.
3814 // */
3815 // char *overflow_arg_area;
3816 // /* location on stack that holds
3817 // * the next overflow argument
3818 // */
3819 // char *reg_save_area;
3820 // /* where r3:r10 and f1:f8 (if saved)
3821 // * are stored
3822 // */
3823 // } va_list[1];
3824
3825 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3826 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3827 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3828 PtrVT);
3829 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3830 PtrVT);
3831
3832 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3833 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3834
3835 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3836 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3837
3838 uint64_t FPROffset = 1;
3839 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3840
3841 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3842
3843 // Store first byte : number of int regs
3844 SDValue firstStore =
3845 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3847 uint64_t nextOffset = FPROffset;
3848 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3849 ConstFPROffset);
3850
3851 // Store second byte : number of float regs
3852 SDValue secondStore =
3853 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3854 MachinePointerInfo(SV, nextOffset), MVT::i8);
3855 nextOffset += StackOffset;
3856 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3857
3858 // Store second word : arguments given on stack
3859 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3860 MachinePointerInfo(SV, nextOffset));
3861 nextOffset += FrameOffset;
3862 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3863
3864 // Store third word : arguments given in registers
3865 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3866 MachinePointerInfo(SV, nextOffset));
3867}
3868
3869/// FPR - The set of FP registers that should be allocated for arguments
3870/// on Darwin and AIX.
3871static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3872 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3873 PPC::F11, PPC::F12, PPC::F13};
3874
3875/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3876/// the stack.
3877static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3878 unsigned PtrByteSize) {
3879 unsigned ArgSize = ArgVT.getStoreSize();
3880 if (Flags.isByVal())
3881 ArgSize = Flags.getByValSize();
3882
3883 // Round up to multiples of the pointer size, except for array members,
3884 // which are always packed.
3885 if (!Flags.isInConsecutiveRegs())
3886 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3887
3888 return ArgSize;
3889}
3890
3891/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3892/// on the stack.
3894 ISD::ArgFlagsTy Flags,
3895 unsigned PtrByteSize) {
3896 Align Alignment(PtrByteSize);
3897
3898 // Altivec parameters are padded to a 16 byte boundary.
3899 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3900 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3901 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3902 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3903 Alignment = Align(16);
3904
3905 // ByVal parameters are aligned as requested.
3906 if (Flags.isByVal()) {
3907 auto BVAlign = Flags.getNonZeroByValAlign();
3908 if (BVAlign > PtrByteSize) {
3909 if (BVAlign.value() % PtrByteSize != 0)
3911 "ByVal alignment is not a multiple of the pointer size");
3912
3913 Alignment = BVAlign;
3914 }
3915 }
3916
3917 // Array members are always packed to their original alignment.
3918 if (Flags.isInConsecutiveRegs()) {
3919 // If the array member was split into multiple registers, the first
3920 // needs to be aligned to the size of the full type. (Except for
3921 // ppcf128, which is only aligned as its f64 components.)
3922 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3923 Alignment = Align(OrigVT.getStoreSize());
3924 else
3925 Alignment = Align(ArgVT.getStoreSize());
3926 }
3927
3928 return Alignment;
3929}
3930
3931/// CalculateStackSlotUsed - Return whether this argument will use its
3932/// stack slot (instead of being passed in registers). ArgOffset,
3933/// AvailableFPRs, and AvailableVRs must hold the current argument
3934/// position, and will be updated to account for this argument.
3935static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3936 unsigned PtrByteSize, unsigned LinkageSize,
3937 unsigned ParamAreaSize, unsigned &ArgOffset,
3938 unsigned &AvailableFPRs,
3939 unsigned &AvailableVRs) {
3940 bool UseMemory = false;
3941
3942 // Respect alignment of argument on the stack.
3943 Align Alignment =
3944 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3945 ArgOffset = alignTo(ArgOffset, Alignment);
3946 // If there's no space left in the argument save area, we must
3947 // use memory (this check also catches zero-sized arguments).
3948 if (ArgOffset >= LinkageSize + ParamAreaSize)
3949 UseMemory = true;
3950
3951 // Allocate argument on the stack.
3952 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3953 if (Flags.isInConsecutiveRegsLast())
3954 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3955 // If we overran the argument save area, we must use memory
3956 // (this check catches arguments passed partially in memory)
3957 if (ArgOffset > LinkageSize + ParamAreaSize)
3958 UseMemory = true;
3959
3960 // However, if the argument is actually passed in an FPR or a VR,
3961 // we don't use memory after all.
3962 if (!Flags.isByVal()) {
3963 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3964 if (AvailableFPRs > 0) {
3965 --AvailableFPRs;
3966 return false;
3967 }
3968 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3969 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3970 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3971 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3972 if (AvailableVRs > 0) {
3973 --AvailableVRs;
3974 return false;
3975 }
3976 }
3977
3978 return UseMemory;
3979}
3980
3981/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3982/// ensure minimum alignment required for target.
3984 unsigned NumBytes) {
3985 return alignTo(NumBytes, Lowering->getStackAlign());
3986}
3987
3988SDValue PPCTargetLowering::LowerFormalArguments(
3989 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3990 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3991 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3992 if (Subtarget.isAIXABI())
3993 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3994 InVals);
3995 if (Subtarget.is64BitELFABI())
3996 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3997 InVals);
3998 assert(Subtarget.is32BitELFABI());
3999 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4000 InVals);
4001}
4002
4003SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4004 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4005 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4006 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4007
4008 // 32-bit SVR4 ABI Stack Frame Layout:
4009 // +-----------------------------------+
4010 // +--> | Back chain |
4011 // | +-----------------------------------+
4012 // | | Floating-point register save area |
4013 // | +-----------------------------------+
4014 // | | General register save area |
4015 // | +-----------------------------------+
4016 // | | CR save word |
4017 // | +-----------------------------------+
4018 // | | VRSAVE save word |
4019 // | +-----------------------------------+
4020 // | | Alignment padding |
4021 // | +-----------------------------------+
4022 // | | Vector register save area |
4023 // | +-----------------------------------+
4024 // | | Local variable space |
4025 // | +-----------------------------------+
4026 // | | Parameter list area |
4027 // | +-----------------------------------+
4028 // | | LR save word |
4029 // | +-----------------------------------+
4030 // SP--> +--- | Back chain |
4031 // +-----------------------------------+
4032 //
4033 // Specifications:
4034 // System V Application Binary Interface PowerPC Processor Supplement
4035 // AltiVec Technology Programming Interface Manual
4036
4038 MachineFrameInfo &MFI = MF.getFrameInfo();
4039 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4040
4041 EVT PtrVT = getPointerTy(MF.getDataLayout());
4042 // Potential tail calls could cause overwriting of argument stack slots.
4043 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4044 (CallConv == CallingConv::Fast));
4045 const Align PtrAlign(4);
4046
4047 // Assign locations to all of the incoming arguments.
4049 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4050 *DAG.getContext());
4051
4052 // Reserve space for the linkage area on the stack.
4053 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4054 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4055 if (useSoftFloat())
4056 CCInfo.PreAnalyzeFormalArguments(Ins);
4057
4058 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4059 CCInfo.clearWasPPCF128();
4060
4061 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4062 CCValAssign &VA = ArgLocs[i];
4063
4064 // Arguments stored in registers.
4065 if (VA.isRegLoc()) {
4066 const TargetRegisterClass *RC;
4067 EVT ValVT = VA.getValVT();
4068
4069 switch (ValVT.getSimpleVT().SimpleTy) {
4070 default:
4071 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4072 case MVT::i1:
4073 case MVT::i32:
4074 RC = &PPC::GPRCRegClass;
4075 break;
4076 case MVT::f32:
4077 if (Subtarget.hasP8Vector())
4078 RC = &PPC::VSSRCRegClass;
4079 else if (Subtarget.hasSPE())
4080 RC = &PPC::GPRCRegClass;
4081 else
4082 RC = &PPC::F4RCRegClass;
4083 break;
4084 case MVT::f64:
4085 if (Subtarget.hasVSX())
4086 RC = &PPC::VSFRCRegClass;
4087 else if (Subtarget.hasSPE())
4088 // SPE passes doubles in GPR pairs.
4089 RC = &PPC::GPRCRegClass;
4090 else
4091 RC = &PPC::F8RCRegClass;
4092 break;
4093 case MVT::v16i8:
4094 case MVT::v8i16:
4095 case MVT::v4i32:
4096 RC = &PPC::VRRCRegClass;
4097 break;
4098 case MVT::v4f32:
4099 RC = &PPC::VRRCRegClass;
4100 break;
4101 case MVT::v2f64:
4102 case MVT::v2i64:
4103 RC = &PPC::VRRCRegClass;
4104 break;
4105 }
4106
4107 SDValue ArgValue;
4108 // Transform the arguments stored in physical registers into
4109 // virtual ones.
4110 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4111 assert(i + 1 < e && "No second half of double precision argument");
4112 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4113 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4114 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4115 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4116 if (!Subtarget.isLittleEndian())
4117 std::swap (ArgValueLo, ArgValueHi);
4118 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4119 ArgValueHi);
4120 } else {
4121 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4122 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4123 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4124 if (ValVT == MVT::i1)
4125 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4126 }
4127
4128 InVals.push_back(ArgValue);
4129 } else {
4130 // Argument stored in memory.
4131 assert(VA.isMemLoc());
4132
4133 // Get the extended size of the argument type in stack
4134 unsigned ArgSize = VA.getLocVT().getStoreSize();
4135 // Get the actual size of the argument type
4136 unsigned ObjSize = VA.getValVT().getStoreSize();
4137 unsigned ArgOffset = VA.getLocMemOffset();
4138 // Stack objects in PPC32 are right justified.
4139 ArgOffset += ArgSize - ObjSize;
4140 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4141
4142 // Create load nodes to retrieve arguments from the stack.
4143 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4144 InVals.push_back(
4145 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4146 }
4147 }
4148
4149 // Assign locations to all of the incoming aggregate by value arguments.
4150 // Aggregates passed by value are stored in the local variable space of the
4151 // caller's stack frame, right above the parameter list area.
4152 SmallVector<CCValAssign, 16> ByValArgLocs;
4153 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4154 ByValArgLocs, *DAG.getContext());
4155
4156 // Reserve stack space for the allocations in CCInfo.
4157 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
4158
4159 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4160
4161 // Area that is at least reserved in the caller of this function.
4162 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
4163 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4164
4165 // Set the size that is at least reserved in caller of this function. Tail
4166 // call optimized function's reserved stack space needs to be aligned so that
4167 // taking the difference between two stack areas will result in an aligned
4168 // stack.
4169 MinReservedArea =
4170 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4171 FuncInfo->setMinReservedArea(MinReservedArea);
4172
4174
4175 // If the function takes variable number of arguments, make a frame index for
4176 // the start of the first vararg value... for expansion of llvm.va_start.
4177 if (isVarArg) {
4178 static const MCPhysReg GPArgRegs[] = {
4179 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4180 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4181 };
4182 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
4183
4184 static const MCPhysReg FPArgRegs[] = {
4185 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4186 PPC::F8
4187 };
4188 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
4189
4190 if (useSoftFloat() || hasSPE())
4191 NumFPArgRegs = 0;
4192
4193 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4194 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4195
4196 // Make room for NumGPArgRegs and NumFPArgRegs.
4197 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4198 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4199
4200 FuncInfo->setVarArgsStackOffset(
4201 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4202 CCInfo.getNextStackOffset(), true));
4203
4204 FuncInfo->setVarArgsFrameIndex(
4205 MFI.CreateStackObject(Depth, Align(8), false));
4206 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4207
4208 // The fixed integer arguments of a variadic function are stored to the
4209 // VarArgsFrameIndex on the stack so that they may be loaded by
4210 // dereferencing the result of va_next.
4211 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4212 // Get an existing live-in vreg, or add a new one.
4213 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4214 if (!VReg)
4215 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4216
4217 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4218 SDValue Store =
4219 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4220 MemOps.push_back(Store);
4221 // Increment the address by four for the next argument to store
4222 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4223 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4224 }
4225
4226 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4227 // is set.
4228 // The double arguments are stored to the VarArgsFrameIndex
4229 // on the stack.
4230 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4231 // Get an existing live-in vreg, or add a new one.
4232 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4233 if (!VReg)
4234 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4235
4236 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4237 SDValue Store =
4238 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4239 MemOps.push_back(Store);
4240 // Increment the address by eight for the next argument to store
4241 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4242 PtrVT);
4243 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4244 }
4245 }
4246
4247 if (!MemOps.empty())
4248 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4249
4250 return Chain;
4251}
4252
4253// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4254// value to MVT::i64 and then truncate to the correct register size.
4255SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4256 EVT ObjectVT, SelectionDAG &DAG,
4257 SDValue ArgVal,
4258 const SDLoc &dl) const {
4259 if (Flags.isSExt())
4260 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4261 DAG.getValueType(ObjectVT));
4262 else if (Flags.isZExt())
4263 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4264 DAG.getValueType(ObjectVT));
4265
4266 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4267}
4268
4269SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4270 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4271 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4272 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4273 // TODO: add description of PPC stack frame format, or at least some docs.
4274 //
4275 bool isELFv2ABI = Subtarget.isELFv2ABI();
4276 bool isLittleEndian = Subtarget.isLittleEndian();
4278 MachineFrameInfo &MFI = MF.getFrameInfo();
4279 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4280
4281 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4282 "fastcc not supported on varargs functions");
4283
4284 EVT PtrVT = getPointerTy(MF.getDataLayout());
4285 // Potential tail calls could cause overwriting of argument stack slots.
4286 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4287 (CallConv == CallingConv::Fast));
4288 unsigned PtrByteSize = 8;
4289 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4290
4291 static const MCPhysReg GPR[] = {
4292 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4293 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4294 };
4295 static const MCPhysReg VR[] = {
4296 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4297 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4298 };
4299
4300 const unsigned Num_GPR_Regs = array_lengthof(GPR);
4301 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4302 const unsigned Num_VR_Regs = array_lengthof(VR);
4303
4304 // Do a first pass over the arguments to determine whether the ABI
4305 // guarantees that our caller has allocated the parameter save area
4306 // on its stack frame. In the ELFv1 ABI, this is always the case;
4307 // in the ELFv2 ABI, it is true if this is a vararg function or if
4308 // any parameter is located in a stack slot.
4309
4310 bool HasParameterArea = !isELFv2ABI || isVarArg;
4311 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4312 unsigned NumBytes = LinkageSize;
4313 unsigned AvailableFPRs = Num_FPR_Regs;
4314 unsigned AvailableVRs = Num_VR_Regs;
4315 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4316 if (Ins[i].Flags.isNest())
4317 continue;
4318
4319 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4320 PtrByteSize, LinkageSize, ParamAreaSize,
4321 NumBytes, AvailableFPRs, AvailableVRs))
4322 HasParameterArea = true;
4323 }
4324
4325 // Add DAG nodes to load the arguments or copy them out of registers. On
4326 // entry to a function on PPC, the arguments start after the linkage area,
4327 // although the first ones are often in registers.
4328
4329 unsigned ArgOffset = LinkageSize;
4330 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4333 unsigned CurArgIdx = 0;
4334 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4335 SDValue ArgVal;
4336 bool needsLoad = false;
4337 EVT ObjectVT = Ins[ArgNo].VT;
4338 EVT OrigVT = Ins[ArgNo].ArgVT;
4339 unsigned ObjSize = ObjectVT.getStoreSize();
4340 unsigned ArgSize = ObjSize;
4341 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4342 if (Ins[ArgNo].isOrigArg()) {
4343 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4344 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4345 }
4346 // We re-align the argument offset for each argument, except when using the
4347 // fast calling convention, when we need to make sure we do that only when
4348 // we'll actually use a stack slot.
4349 unsigned CurArgOffset;
4351 auto ComputeArgOffset = [&]() {
4352 /* Respect alignment of argument on the stack. */
4353 Alignment =
4354 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4355 ArgOffset = alignTo(ArgOffset, Alignment);
4356 CurArgOffset = ArgOffset;
4357 };
4358
4359 if (CallConv != CallingConv::Fast) {
4360 ComputeArgOffset();
4361
4362 /* Compute GPR index associated with argument offset. */
4363 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4364 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4365 }
4366
4367 // FIXME the codegen can be much improved in some cases.
4368 // We do not have to keep everything in memory.
4369 if (Flags.isByVal()) {
4370 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4371
4372 if (CallConv == CallingConv::Fast)
4373 ComputeArgOffset();
4374
4375 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4376 ObjSize = Flags.getByValSize();
4377 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4378 // Empty aggregate parameters do not take up registers. Examples:
4379 // struct { } a;
4380 // union { } b;
4381 // int c[0];
4382 // etc. However, we have to provide a place-holder in InVals, so
4383 // pretend we have an 8-byte item at the current address for that
4384 // purpose.
4385 if (!ObjSize) {
4386 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4387 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4388 InVals.push_back(FIN);
4389 continue;
4390 }
4391
4392 // Create a stack object covering all stack doublewords occupied
4393 // by the argument. If the argument is (fully or partially) on
4394 // the stack, or if the argument is fully in registers but the
4395 // caller has allocated the parameter save anyway, we can refer
4396 // directly to the caller's stack frame. Otherwise, create a
4397 // local copy in our own frame.
4398 int FI;
4399 if (HasParameterArea ||
4400 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4401 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4402 else
4403 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4404 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4405
4406 // Handle aggregates smaller than 8 bytes.
4407 if (ObjSize < PtrByteSize) {
4408 // The value of the object is its address, which differs from the
4409 // address of the enclosing doubleword on big-endian systems.
4410 SDValue Arg = FIN;
4411 if (!isLittleEndian) {
4412 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4413 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4414 }
4415 InVals.push_back(Arg);
4416
4417 if (GPR_idx != Num_GPR_Regs) {
4418 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4419 FuncInfo->addLiveInAttr(VReg, Flags);
4420 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4421 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4422 SDValue Store =
4423 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4424 MachinePointerInfo(&*FuncArg), ObjType);
4425 MemOps.push_back(Store);
4426 }
4427 // Whether we copied from a register or not, advance the offset
4428 // into the parameter save area by a full doubleword.
4429 ArgOffset += PtrByteSize;
4430 continue;
4431 }
4432
4433 // The value of the object is its address, which is the address of
4434 // its first stack doubleword.
4435 InVals.push_back(FIN);
4436
4437 // Store whatever pieces of the object are in registers to memory.
4438 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4439 if (GPR_idx == Num_GPR_Regs)
4440 break;
4441
4442 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4443 FuncInfo->addLiveInAttr(VReg, Flags);
4444 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4445 SDValue Addr = FIN;
4446 if (j) {
4447 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4448 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4449 }
4450 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4451 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4452 SDValue Store =
4453 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4454 MachinePointerInfo(&*FuncArg, j), ObjType);
4455 MemOps.push_back(Store);
4456 ++GPR_idx;
4457 }
4458 ArgOffset += ArgSize;
4459 continue;
4460 }
4461
4462 switch (ObjectVT.getSimpleVT().SimpleTy) {
4463 default: llvm_unreachable("Unhandled argument type!");
4464 case MVT::i1:
4465 case MVT::i32:
4466 case MVT::i64:
4467 if (Flags.isNest()) {
4468 // The 'nest' parameter, if any, is passed in R11.
4469 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4470 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4471
4472 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4473 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4474
4475 break;
4476 }
4477
4478 // These can be scalar arguments or elements of an integer array type
4479 // passed directly. Clang may use those instead of "byval" aggregate
4480 // types to avoid forcing arguments to memory unnecessarily.
4481 if (GPR_idx != Num_GPR_Regs) {
4482 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4483 FuncInfo->addLiveInAttr(VReg, Flags);
4484 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4485
4486 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4487 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4488 // value to MVT::i64 and then truncate to the correct register size.
4489 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4490 } else {
4491 if (CallConv == CallingConv::Fast)
4492 ComputeArgOffset();
4493
4494 needsLoad = true;
4495 ArgSize = PtrByteSize;
4496 }
4497 if (CallConv != CallingConv::Fast || needsLoad)
4498 ArgOffset += 8;
4499 break;
4500
4501 case MVT::f32:
4502 case MVT::f64:
4503 // These can be scalar arguments or elements of a float array type
4504 // passed directly. The latter are used to implement ELFv2 homogenous
4505 // float aggregates.
4506 if (FPR_idx != Num_FPR_Regs) {
4507 unsigned VReg;
4508
4509 if (ObjectVT == MVT::f32)
4510 VReg = MF.addLiveIn(FPR[FPR_idx],
4511 Subtarget.hasP8Vector()
4512 ? &PPC::VSSRCRegClass
4513 : &PPC::F4RCRegClass);
4514 else
4515 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4516 ? &PPC::VSFRCRegClass
4517 : &PPC::F8RCRegClass);
4518
4519 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4520 ++FPR_idx;
4521 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4522 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4523 // once we support fp <-> gpr moves.
4524
4525 // This can only ever happen in the presence of f32 array types,
4526 // since otherwise we never run out of FPRs before running out
4527 // of GPRs.
4528 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4529 FuncInfo->addLiveInAttr(VReg, Flags);
4530 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4531
4532 if (ObjectVT == MVT::f32) {
4533 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4534 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4535 DAG.getConstant(32, dl, MVT::i32));
4536 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4537 }
4538
4539 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4540 } else {
4541 if (CallConv == CallingConv::Fast)
4542 ComputeArgOffset();
4543
4544 needsLoad = true;
4545 }
4546
4547 // When passing an array of floats, the array occupies consecutive
4548 // space in the argument area; only round up to the next doubleword
4549 // at the end of the array. Otherwise, each float takes 8 bytes.
4550 if (CallConv != CallingConv::Fast || needsLoad) {
4551 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4552 ArgOffset += ArgSize;
4553 if (Flags.isInConsecutiveRegsLast())
4554 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4555 }
4556 break;
4557 case MVT::v4f32:
4558 case MVT::v4i32:
4559 case MVT::v8i16:
4560 case MVT::v16i8:
4561 case MVT::v2f64:
4562 case MVT::v2i64:
4563 case MVT::v1i128:
4564 case MVT::f128:
4565 // These can be scalar arguments or elements of a vector array type
4566 // passed directly. The latter are used to implement ELFv2 homogenous
4567 // vector aggregates.
4568 if (VR_idx != Num_VR_Regs) {
4569 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4570 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4571 ++VR_idx;
4572 } else {
4573 if (CallConv == CallingConv::Fast)
4574 ComputeArgOffset();
4575 needsLoad = true;
4576 }
4577 if (CallConv != CallingConv::Fast || needsLoad)
4578 ArgOffset += 16;
4579 break;
4580 }
4581
4582 // We need to load the argument to a virtual register if we determined
4583 // above that we ran out of physical registers of the appropriate type.
4584 if (needsLoad) {
4585 if (ObjSize < ArgSize && !isLittleEndian)
4586 CurArgOffset += ArgSize - ObjSize;
4587 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4588 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4589 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4590 }
4591
4592 InVals.push_back(ArgVal);
4593 }
4594
4595 // Area that is at least reserved in the caller of this function.
4596 unsigned MinReservedArea;
4597 if (HasParameterArea)
4598 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4599 else
4600 MinReservedArea = LinkageSize;
4601
4602 // Set the size that is at least reserved in caller of this function. Tail
4603 // call optimized functions' reserved stack space needs to be aligned so that
4604 // taking the difference between two stack areas will result in an aligned
4605 // stack.
4606 MinReservedArea =
4607 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4608 FuncInfo->setMinReservedArea(MinReservedArea);
4609
4610 // If the function takes variable number of arguments, make a frame index for
4611 // the start of the first vararg value... for expansion of llvm.va_start.
4612 // On ELFv2ABI spec, it writes:
4613 // C programs that are intended to be *portable* across different compilers
4614 // and architectures must use the header file <stdarg.h> to deal with variable
4615 // argument lists.
4616 if (isVarArg && MFI.hasVAStart()) {
4617 int Depth = ArgOffset;
4618
4619 FuncInfo->setVarArgsFrameIndex(
4620 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4621 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4622
4623 // If this function is vararg, store any remaining integer argument regs
4624 // to their spots on the stack so that they may be loaded by dereferencing
4625 // the result of va_next.
4626 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4627 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4628 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4629 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4630 SDValue Store =
4631 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4632 MemOps.push_back(Store);
4633 // Increment the address by four for the next argument to store
4634 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4635 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4636 }
4637 }
4638
4639 if (!MemOps.empty())
4640 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4641
4642 return Chain;
4643}
4644
4645/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4646/// adjusted to accommodate the arguments for the tailcall.
4647static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4648 unsigned ParamSize) {
4649
4650 if (!isTailCall) return 0;
4651
4653 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4654 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4655 // Remember only if the new adjustment is bigger.
4656 if (SPDiff < FI->getTailCallSPDelta())
4657 FI->setTailCallSPDelta(SPDiff);
4658
4659 return SPDiff;
4660}
4661
4663
4664static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4665 const TargetMachine &TM) {
4666 // It does not make sense to call callsShareTOCBase() with a caller that
4667 // is PC Relative since PC Relative callers do not have a TOC.
4668#ifndef NDEBUG
4669 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4670 assert(!STICaller->isUsingPCRelativeCalls() &&
4671 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4672#endif
4673
4674 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4675 // don't have enough information to determine if the caller and callee share
4676 // the same TOC base, so we have to pessimistically assume they don't for
4677 // correctness.
4679 if (!G)
4680 return false;
4681
4682 const GlobalValue *GV = G->getGlobal();
4683
4684 // If the callee is preemptable, then the static linker will use a plt-stub
4685 // which saves the toc to the stack, and needs a nop after the call
4686 // instruction to convert to a toc-restore.
4687 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4688 return false;
4689
4690 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4691 // We may need a TOC restore in the situation where the caller requires a
4692 // valid TOC but the callee is PC Relative and does not.
4693 const Function *F = dyn_cast<Function>(GV);
4694 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4695
4696 // If we have an Alias we can try to get the function from there.
4697 if (Alias) {
4698 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4699 F = dyn_cast<Function>(GlobalObj);
4700 }
4701
4702 // If we still have no valid function pointer we do not have enough
4703 // information to determine if the callee uses PC Relative calls so we must
4704 // assume that it does.
4705 if (!F)
4706 return false;
4707
4708 // If the callee uses PC Relative we cannot guarantee that the callee won't
4709 // clobber the TOC of the caller and so we must assume that the two
4710 // functions do not share a TOC base.
4711 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4712 if (STICallee->isUsingPCRelativeCalls())
4713 return false;
4714
4715 // If the GV is not a strong definition then we need to assume it can be
4716 // replaced by another function at link time. The function that replaces
4717 // it may not share the same TOC as the caller since the callee may be
4718 // replaced by a PC Relative version of the same function.
4719 if (!GV->isStrongDefinitionForLinker())
4720 return false;
4721
4722 // The medium and large code models are expected to provide a sufficiently
4723 // large TOC to provide all data addressing needs of a module with a
4724 // single TOC.
4725 if (CodeModel::Medium == TM.getCodeModel() ||
4726 CodeModel::Large == TM.getCodeModel())
4727 return true;
4728
4729 // Any explicitly-specified sections and section prefixes must also match.
4730 // Also, if we're using -ffunction-sections, then each function is always in
4731 // a different section (the same is true for COMDAT functions).
4732 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4733 GV->getSection() != Caller->getSection())
4734 return false;
4735 if (const auto *F = dyn_cast<Function>(GV)) {
4736 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4737 return false;
4738 }
4739
4740 return true;
4741}
4742
4743static bool
4745 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4746 assert(Subtarget.is64BitELFABI());
4747
4748 const unsigned PtrByteSize = 8;
4749 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4750
4751 static const MCPhysReg GPR[] = {
4752 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4753 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4754 };
4755 static const MCPhysReg VR[] = {
4756 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4757 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4758 };
4759
4760 const unsigned NumGPRs = array_lengthof(GPR);
4761 const unsigned NumFPRs = 13;
4762 const unsigned NumVRs = array_lengthof(VR);
4763 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4764
4765 unsigned NumBytes = LinkageSize;
4766 unsigned AvailableFPRs = NumFPRs;
4767 unsigned AvailableVRs = NumVRs;
4768
4769 for (const ISD::OutputArg& Param : Outs) {
4770 if (Param.Flags.isNest()) continue;
4771
4772 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4773 LinkageSize, ParamAreaSize, NumBytes,
4774 AvailableFPRs, AvailableVRs))
4775 return true;
4776 }
4777 return false;
4778}
4779
4780static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4781 if (CB.arg_size() != CallerFn->arg_size())
4782 return false;
4783
4784 auto CalleeArgIter = CB.arg_begin();
4785 auto CalleeArgEnd = CB.arg_end();
4786 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4787
4788 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4789 const Value* CalleeArg = *CalleeArgIter;
4790 const Value* CallerArg = &(*CallerArgIter);
4791 if (CalleeArg == CallerArg)
4792 continue;
4793
4794 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4795 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4796 // }
4797 // 1st argument of callee is undef and has the same type as caller.
4798 if (CalleeArg->getType() == CallerArg->getType() &&
4799 isa<UndefValue>(CalleeArg))
4800 continue;
4801
4802 return false;
4803 }
4804
4805 return true;
4806}
4807
4808// Returns true if TCO is possible between the callers and callees
4809// calling conventions.
4810static bool
4812 CallingConv::ID CalleeCC) {
4813 // Tail calls are possible with fastcc and ccc.
4814 auto isTailCallableCC = [] (CallingConv::ID CC){
4815 return CC == CallingConv::C || CC == CallingConv::Fast;
4816 };
4817 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4818 return false;
4819
4820 // We can safely tail call both fastcc and ccc callees from a c calling
4821 // convention caller. If the caller is fastcc, we may have less stack space
4822 // than a non-fastcc caller with the same signature so disable tail-calls in
4823 // that case.
4824 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4825}
4826
4827bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4828 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4830 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4831 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4832
4833 if (DisableSCO && !TailCallOpt) return false;
4834
4835 // Variadic argument functions are not supported.
4836 if (isVarArg) return false;
4837
4838 auto &Caller = DAG.getMachineFunction().getFunction();
4839 // Check that the calling conventions are compatible for tco.
4840 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4841 return false;
4842
4843 // Caller contains any byval parameter is not supported.
4844 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4845 return false;
4846
4847 // Callee contains any byval parameter is not supported, too.
4848 // Note: This is a quick work around, because in some cases, e.g.
4849 // caller's stack size > callee's stack size, we are still able to apply
4850 // sibling call optimization. For example, gcc is able to do SCO for caller1
4851 // in the following example, but not for caller2.
4852 // struct test {
4853 // long int a;
4854 // char ary[56];
4855 // } gTest;
4856 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4857 // b->a = v.a;
4858 // return 0;
4859 // }
4860 // void caller1(struct test a, struct test c, struct test *b) {
4861 // callee(gTest, b); }
4862 // void caller2(struct test *b) { callee(gTest, b); }
4863 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4864 return false;
4865
4866 // If callee and caller use different calling conventions, we cannot pass
4867 // parameters on stack since offsets for the parameter area may be different.
4868 if (Caller.getCallingConv() != CalleeCC &&
4869 needStackSlotPassParameters(Subtarget, Outs))
4870 return false;
4871
4872 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4873 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4874 // callee potentially have different TOC bases then we cannot tail call since
4875 // we need to restore the TOC pointer after the call.
4876 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4877 // We cannot guarantee this for indirect calls or calls to external functions.
4878 // When PC-Relative addressing is used, the concept of the TOC is no longer
4879 // applicable so this check is not required.
4880 // Check first for indirect calls.
4881 if (!Subtarget.isUsingPCRelativeCalls() &&
4883 return false;
4884
4885 // Check if we share the TOC base.
4886 if (!Subtarget.isUsingPCRelativeCalls() &&
4888 return false;
4889
4890 // TCO allows altering callee ABI, so we don't have to check further.
4891 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4892 return true;
4893
4894 if (DisableSCO) return false;
4895
4896 // If callee use the same argument list that caller is using, then we can
4897 // apply SCO on this case. If it is not, then we need to check if callee needs
4898 // stack for passing arguments.
4899 // PC Relative tail calls may not have a CallBase.
4900 // If there is no CallBase we cannot verify if we have the same argument
4901 // list so assume that we don't have the same argument list.
4902 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4903 needStackSlotPassParameters(Subtarget, Outs))
4904 return false;
4905 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4906 return false;
4907
4908 return true;
4909}
4910
4911/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4912/// for tail call optimization. Targets which want to do tail call
4913/// optimization should implement this function.
4914bool
4915PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4916 CallingConv::ID CalleeCC,
4917 bool isVarArg,
4919 SelectionDAG& DAG) const {
4920 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4921 return false;
4922
4923 // Variable argument functions are not supported.
4924 if (isVarArg)
4925 return false;
4926
4928 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4929 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4930 // Functions containing by val parameters are not supported.
4931 for (unsigned i = 0; i != Ins.size(); i++) {
4932 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4933 if (Flags.isByVal()) return false;
4934 }
4935
4936 // Non-PIC/GOT tail calls are supported.
4937 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4938 return true;
4939
4940 // At the moment we can only do local tail calls (in same module, hidden
4941 // or protected) if we are generating PIC.
4943 return G->getGlobal()->hasHiddenVisibility()
4944 || G->getGlobal()->hasProtectedVisibility();
4945 }
4946
4947 return false;
4948}
4949
4950/// isCallCompatibleAddress - Return the immediate to use if the specified
4951/// 32-bit value is representable in the immediate field of a BxA instruction.
4954 if (!C) return nullptr;
4955
4956 int Addr = C->getZExtValue();
4957 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4959 return nullptr; // Top 6 bits have to be sext of immediate.
4960
4961 return DAG
4962 .getConstant(
4963 (int)C->getZExtValue() >> 2, SDLoc(Op),
4965 .getNode();
4966}
4967
4968namespace {
4969
4970struct TailCallArgumentInfo {
4971 SDValue Arg;
4972 SDValue FrameIdxOp;
4973 int FrameIdx = 0;
4974
4975 TailCallArgumentInfo() = default;
4976};
4977
4978} // end anonymous namespace
4979
4980/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4982 SelectionDAG &DAG, SDValue Chain,
4983 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4984 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4985 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4986 SDValue Arg = TailCallArgs[i].Arg;
4987 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4988 int FI = TailCallArgs[i].FrameIdx;
4989 // Store relative to framepointer.
4990 MemOpChains.push_back(DAG.getStore(
4991 Chain, dl, Arg, FIN,
4993 }
4994}
4995
4996/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4997/// the appropriate stack slot for the tail call optimized function call.
4999 SDValue OldRetAddr, SDValue OldFP,
5000 int SPDiff, const SDLoc &dl) {
5001 if (SPDiff) {
5002 // Calculate the new stack slot for the return address.
5004 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5005 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5006 bool isPPC64 = Subtarget.isPPC64();
5007 int SlotSize = isPPC64 ? 8 : 4;
5008 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5009 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5010 NewRetAddrLoc, true);
5011 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5012 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5013 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5014 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5015 }
5016 return Chain;
5017}
5018
5019/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5020/// the position of the argument.
5021static void
5023 SDValue Arg, int SPDiff, unsigned ArgOffset,
5024 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5025 int Offset = ArgOffset + SPDiff;
5026 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5027 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5028 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5029 SDValue FIN = DAG.getFrameIndex(FI, VT);
5030 TailCallArgumentInfo Info;
5031 Info.Arg = Arg;
5032 Info.FrameIdxOp = FIN;
5033 Info.FrameIdx = FI;
5034 TailCallArguments.push_back(Info);
5035}
5036
5037/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5038/// stack slot. Returns the chain as result and the loaded frame pointers in
5039/// LROpOut/FPOpout. Used when tail calling.
5040SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5041 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5042 SDValue &FPOpOut, const SDLoc &dl) const {
5043 if (SPDiff) {
5044 // Load the LR and FP stack slot for later adjusting.
5045 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5046 LROpOut = getReturnAddrFrameIndex(DAG);
5047 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5048 Chain = SDValue(LROpOut.getNode(), 1);
5049 }
5050 return Chain;
5051}
5052
5053/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5054/// by "Src" to address "Dst" of size "Size". Alignment information is
5055/// specified by the specific parameter attribute. The copy will be passed as
5056/// a byval function parameter.
5057/// Sometimes what we are copying is the end of a larger object, the part that
5058/// does not fit in registers.
5060 SDValue Chain, ISD::ArgFlagsTy Flags,
5061 SelectionDAG &DAG, const SDLoc &dl) {
5062 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5063 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5064 Flags.getNonZeroByValAlign(), false, false, false,
5066}
5067
5068/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5069/// tail calls.
5072 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5073 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5074 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5076 if (!isTailCall) {
5077 if (isVector) {
5078 SDValue StackPtr;
5079 if (isPPC64)
5080 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5081 else
5082 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5083 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5084 DAG.getConstant(ArgOffset, dl, PtrVT));
5085 }
5086 MemOpChains.push_back(
5087 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5088 // Calculate and remember argument location.
5089 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5090 TailCallArguments);
5091}
5092
5093static void
5095 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5096 SDValue FPOp,
5097 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5098 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5099 // might overwrite each other in case of tail call optimization.
5100 SmallVector<SDValue, 8> MemOpChains2;
5101 // Do not flag preceding copytoreg stuff together with the following stuff.
5102 InFlag = SDValue();
5103 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5104 MemOpChains2, dl);
5105 if (!MemOpChains2.empty())
5106 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5107
5108 // Store the return address to the appropriate stack slot.
5109 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5110
5111 // Emit callseq_end just before tailcall node.
5112 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5113 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5114 InFlag = Chain.getValue(1);
5115}
5116
5117// Is this global address that of a function that can be called by name? (as
5118// opposed to something that must hold a descriptor for an indirect call).
5121 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5122 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5123 return false;
5124
5125 return G->getGlobal()->getValueType()->isFunctionTy();
5126 }
5127
5128 return false;
5129}
5130
5131SDValue PPCTargetLowering::LowerCallResult(
5132 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5133 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5134 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5136 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5137 *DAG.getContext());
5138
5139 CCRetInfo.AnalyzeCallResult(
5140 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5142 : RetCC_PPC);
5143
5144 // Copy all of the result registers out of their specified physreg.
5145 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5146 CCValAssign &VA = RVLocs[i];
5147 assert(VA.isRegLoc() && "Can only return in registers!");
5148
5149 SDValue Val;
5150
5151 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5152 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5153 InFlag);
5154 Chain = Lo.getValue(1);
5155 InFlag = Lo.getValue(2);
5156 VA = RVLocs[++i]; // skip ahead to next loc
5157 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5158 InFlag);
5159 Chain = Hi.getValue(1);
5160 InFlag = Hi.getValue(2);
5161 if (!Subtarget.isLittleEndian())
5162 std::swap (Lo, Hi);
5163 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5164 } else {
5165 Val = DAG.getCopyFromReg(Chain, dl,
5166 VA.getLocReg(), VA.getLocVT(), InFlag);
5167 Chain = Val.getValue(1);
5168 InFlag = Val.getValue(2);
5169 }
5170
5171 switch (VA.getLocInfo()) {
5172 default: llvm_unreachable("Unknown loc info!");
5173 case CCValAssign::Full: break;
5174 case CCValAssign::AExt:
5175 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5176 break;
5177 case CCValAssign::ZExt:
5178 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5179 DAG.getValueType(VA.getValVT()));
5180 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5181 break;
5182 case CCValAssign::SExt:
5183 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5184 DAG.getValueType(VA.getValVT()));
5185 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5186 break;
5187 }
5188
5189 InVals.push_back(Val);
5190 }
5191
5192 return Chain;
5193}
5194
5196 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5197 // PatchPoint calls are not indirect.
5198 if (isPatchPoint)
5199 return false;
5200
5202 return false;
5203
5204 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5205 // becuase the immediate function pointer points to a descriptor instead of
5206 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5207 // pointer immediate points to the global entry point, while the BLA would
5208 // need to jump to the local entry point (see rL211174).
5209 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5211 return false;
5212
5213 return true;
5214}
5215
5216// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5217static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5218 return Subtarget.isAIXABI() ||
5219 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5220}
5221
5223 const Function &Caller, const SDValue &Callee,
5224 const PPCSubtarget &Subtarget,
5225 const TargetMachine &TM,
5226 bool IsStrictFPCall = false) {
5227 if (CFlags.IsTailCall)
5228 return PPCISD::TC_RETURN;
5229
5230 unsigned RetOpc = 0;
5231 // This is a call through a function pointer.
5232 if (CFlags.IsIndirect) {
5233 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5234 // indirect calls. The save of the caller's TOC pointer to the stack will be
5235 // inserted into the DAG as part of call lowering. The restore of the TOC
5236 // pointer is modeled by using a pseudo instruction for the call opcode that
5237 // represents the 2 instruction sequence of an indirect branch and link,
5238 // immediately followed by a load of the TOC pointer from the the stack save
5239 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5240 // as it is not saved or used.
5242 : PPCISD::BCTRL;
5243 } else if (Subtarget.isUsingPCRelativeCalls()) {
5244 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5245 RetOpc = PPCISD::CALL_NOTOC;
5246 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5247 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5248 // immediately following the call instruction if the caller and callee may
5249 // have different TOC bases. At link time if the linker determines the calls
5250 // may not share a TOC base, the call is redirected to a trampoline inserted
5251 // by the linker. The trampoline will (among other things) save the callers
5252 // TOC pointer at an ABI designated offset in the linkage area and the
5253 // linker will rewrite the nop to be a load of the TOC pointer from the
5254 // linkage area into gpr2.
5255 RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5257 else
5258 RetOpc = PPCISD::CALL;
5259 if (IsStrictFPCall) {
5260 switch (RetOpc) {
5261 default:
5262 llvm_unreachable("Unknown call opcode");
5265 break;
5266 case PPCISD::BCTRL:
5267 RetOpc = PPCISD::BCTRL_RM;
5268 break;
5269 case PPCISD::CALL_NOTOC:
5270 RetOpc = PPCISD::CALL_NOTOC_RM;
5271 break;
5272 case PPCISD::CALL:
5273 RetOpc = PPCISD::CALL_RM;
5274 break;
5275 case PPCISD::CALL_NOP:
5276 RetOpc = PPCISD::CALL_NOP_RM;
5277 break;
5278 }
5279 }
5280 return RetOpc;
5281}
5282
5284 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5285 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5286 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5287 return SDValue(Dest, 0);
5288
5289 // Returns true if the callee is local, and false otherwise.
5290 auto isLocalCallee = [&]() {
5293 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5294
5295 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5297 };
5298
5299 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5300 // a static relocation model causes some versions of GNU LD (2.17.50, at
5301 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5302 // built with secure-PLT.
5303 bool UsePlt =
5304 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5306
5307 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5308 const TargetMachine &TM = Subtarget.getTargetMachine();
5309 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5310 MCSymbolXCOFF *S =
5312
5314 return DAG.getMCSymbol(S, PtrVT);
5315 };
5316
5318 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5319
5320 if (Subtarget.isAIXABI()) {
5321 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5322 return getAIXFuncEntryPointSymbolSDNode(GV);
5323 }
5324 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5325 UsePlt ? PPCII::MO_PLT : 0);
5326 }
5327
5329 const char *SymName = S->getSymbol();
5330 if (Subtarget.isAIXABI()) {
5331 // If there exists a user-declared function whose name is the same as the
5332 // ExternalSymbol's, then we pick up the user-declared version.
5334 if (const Function *F =
5335 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5336 return getAIXFuncEntryPointSymbolSDNode(F);
5337
5338 // On AIX, direct function calls reference the symbol for the function's
5339 // entry point, which is named by prepending a "." before the function's
5340 // C-linkage name. A Qualname is returned here because an external
5341 // function entry point is a csect with XTY_ER property.
5342 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5343 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5344 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5345 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5347 return Sec->getQualNameSymbol();
5348 };
5349
5350 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5351 }
5352 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5353 UsePlt ? PPCII::MO_PLT : 0);
5354 }
5355
5356 // No transformation needed.
5357 assert(Callee.getNode() && "What no callee?");
5358 return Callee;
5359}
5360
5362 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5363 "Expected a CALLSEQ_STARTSDNode.");
5364
5365 // The last operand is the chain, except when the node has glue. If the node
5366 // has glue, then the last operand is the glue, and the chain is the second
5367 // last operand.
5368 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5369 if (LastValue.getValueType() != MVT::Glue)
5370 return LastValue;
5371
5372 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5373}
5374
5375// Creates the node that moves a functions address into the count register
5376// to prepare for an indirect call instruction.
5378 SDValue &Glue, SDValue &Chain,
5379 const SDLoc &dl) {
5380 SDValue MTCTROps[] = {Chain, Callee, Glue};
5381 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5382 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5383 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5384 // The glue is the second value produced.
5385 Glue = Chain.getValue(1);
5386}
5387
5389 SDValue &Glue, SDValue &Chain,
5390 SDValue CallSeqStart,
5391 const CallBase *CB, const SDLoc &dl,
5392 bool hasNest,
5393 const PPCSubtarget &Subtarget) {
5394 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5395 // entry point, but to the function descriptor (the function entry point
5396 // address is part of the function descriptor though).
5397 // The function descriptor is a three doubleword structure with the
5398 // following fields: function entry point, TOC base address and
5399 // environment pointer.
5400 // Thus for a call through a function pointer, the following actions need
5401 // to be performed:
5402 // 1. Save the TOC of the caller in the TOC save area of its stack
5403 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5404 // 2. Load the address of the function entry point from the function
5405 // descriptor.
5406 // 3. Load the TOC of the callee from the function descriptor into r2.
5407 // 4. Load the environment pointer from the function descriptor into
5408 // r11.
5409 // 5. Branch to the function entry point address.
5410 // 6. On return of the callee, the TOC of the caller needs to be
5411 // restored (this is done in FinishCall()).
5412 //
5413 // The loads are scheduled at the beginning of the call sequence, and the
5414 // register copies are flagged together to ensure that no other
5415 // operations can be scheduled in between. E.g. without flagging the
5416 // copies together, a TOC access in the caller could be scheduled between
5417 // the assignment of the callee TOC and the branch to the callee, which leads
5418 // to incorrect code.
5419
5420 // Start by loading the function address from the descriptor.
5421 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5422 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5426
5427 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5428
5429 // Registers used in building the DAG.
5430 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5431 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5432
5433 // Offsets of descriptor members.
5434 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5435 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5436
5437 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5438 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5439
5440 // One load for the functions entry point address.
5441 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5442 Alignment, MMOFlags);
5443
5444 // One for loading the TOC anchor for the module that contains the called
5445 // function.
5446 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5447 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5448 SDValue TOCPtr =
5449 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5450 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5451
5452 // One for loading the environment pointer.
5453 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5454 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5455 SDValue LoadEnvPtr =
5456 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5457 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5458
5459
5460 // Then copy the newly loaded TOC anchor to the TOC pointer.
5461 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5462 Chain = TOCVal.getValue(0);
5463 Glue = TOCVal.getValue(1);
5464
5465 // If the function call has an explicit 'nest' parameter, it takes the
5466 // place of the environment pointer.
5467 assert((!hasNest || !Subtarget.isAIXABI()) &&
5468 "Nest parameter is not supported on AIX.");
5469 if (!hasNest) {
5470 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5471 Chain = EnvVal.getValue(0);
5472 Glue = EnvVal.getValue(1);
5473 }
5474
5475 // The rest of the indirect call sequence is the same as the non-descriptor
5476 // DAG.
5477 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5478}
5479
5480static void
5482 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5483 SelectionDAG &DAG,
5484 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5485 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5486 const PPCSubtarget &Subtarget) {
5487 const bool IsPPC64 = Subtarget.isPPC64();
5488 // MVT for a general purpose register.
5489 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5490
5491 // First operand is always the chain.
5492 Ops.push_back(Chain);
5493
5494 // If it's a direct call pass the callee as the second operand.
5495 if (!CFlags.IsIndirect)
5496 Ops.push_back(Callee);
5497 else {
5498 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5499
5500 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5501 // on the stack (this would have been done in `LowerCall_64SVR4` or
5502 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5503 // represents both the indirect branch and a load that restores the TOC
5504 // pointer from the linkage area. The operand for the TOC restore is an add
5505 // of the TOC save offset to the stack pointer. This must be the second
5506 // operand: after the chain input but before any other variadic arguments.
5507 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5508 // saved or used.
5509 if (isTOCSaveRestoreRequired(Subtarget)) {
5510 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5511
5512 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5513 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5514 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5515 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5516 Ops.push_back(AddTOC);
5517 }
5518
5519 // Add the register used for the environment pointer.
5520 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5522 RegVT));
5523
5524
5525 // Add CTR register as callee so a bctr can be emitted later.
5526 if (CFlags.IsTailCall)
5527 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5528 }
5529
5530 // If this is a tail call add stack pointer delta.
5531 if (CFlags.IsTailCall)
5532 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5533
5534 // Add argument registers to the end of the list so that they are known live
5535 // into the call.
5536 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5537 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5538 RegsToPass[i].second.getValueType()));
5539
5540 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5541 // no way to mark dependencies as implicit here.
5542 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5543 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5544 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5545 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5546
5547 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5548 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5549 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5550
5551 // Add a register mask operand representing the call-preserved registers.
5552 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5553 const uint32_t *Mask =
5554 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5555 assert(Mask && "Missing call preserved mask for calling convention");
5556 Ops.push_back(DAG.getRegisterMask(Mask));
5557
5558 // If the glue is valid, it is the last operand.
5559 if (Glue.getNode())
5560 Ops.push_back(Glue);
5561}
5562
5563SDValue PPCTargetLowering::FinishCall(
5564 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5565 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5566 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5567 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5568 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5569
5570 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5571 Subtarget.isAIXABI())
5572 setUsesTOCBasePtr(DAG);
5573
5574 unsigned CallOpc =
5576 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5577
5578 if (!CFlags.IsIndirect)
5579 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5580 else if (Subtarget.usesFunctionDescriptors())
5581 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5582 dl, CFlags.HasNest, Subtarget);
5583 else
5584 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5585
5586 // Build the operand list for the call instruction.
5588 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5589 SPDiff, Subtarget);
5590
5591 // Emit tail call.
5592 if (CFlags.IsTailCall) {
5593 // Indirect tail call when using PC Relative calls do not have the same
5594 // constraints.
5595 assert(((Callee.getOpcode() == ISD::Register &&
5596 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5597 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5598 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5600 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5601 "Expecting a global address, external symbol, absolute value, "
5602 "register or an indirect tail call when PC Relative calls are "
5603 "used.");
5604 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5605 assert(CallOpc == PPCISD::TC_RETURN &&
5606 "Unexpected call opcode for a tail call.");
5608 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5609 }
5610
5611 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5612 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5613 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5614 Glue = Chain.getValue(1);
5615
5616 // When performing tail call optimization the callee pops its arguments off
5617 // the stack. Account for this here so these bytes can be pushed back on in
5618 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5619 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5621 ? NumBytes
5622 : 0;
5623
5624 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5625 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5626 Glue, dl);
5627 Glue = Chain.getValue(1);
5628
5629 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5630 DAG, InVals);
5631}
5632
5633SDValue
5634PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5635 SmallVectorImpl<SDValue> &InVals) const {
5636 SelectionDAG &DAG = CLI.DAG;
5637 SDLoc &dl = CLI.DL;
5639 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5641 SDValue Chain = CLI.Chain;
5642 SDValue Callee = CLI.Callee;
5643 bool &isTailCall = CLI.IsTailCall;
5644 CallingConv::ID CallConv = CLI.CallConv;
5645 bool isVarArg = CLI.IsVarArg;
5646 bool isPatchPoint = CLI.IsPatchPoint;
5647 const CallBase *CB = CLI.CB;
5648
5649 if (isTailCall) {
5650 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5651 isTailCall = false;
5652 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5653 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5654 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5655 else
5656 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5657 Ins, DAG);
5658 if (isTailCall) {
5659 ++NumTailCalls;
5660 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5661 ++NumSiblingCalls;
5662
5663 // PC Relative calls no longer guarantee that the callee is a Global
5664 // Address Node. The callee could be an indirect tail call in which
5665 // case the SDValue for the callee could be a load (to load the address
5666 // of a function pointer) or it may be a register copy (to move the
5667 // address of the callee from a function parameter into a virtual
5668 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5669 assert((Subtarget.isUsingPCRelativeCalls() ||
5671 "Callee should be an llvm::Function object.");
5672
5673 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5674 << "\nTCO callee: ");
5675 LLVM_DEBUG(Callee.dump());
5676 }
5677 }
5678
5679 if (!isTailCall && CB && CB->isMustTailCall())
5680 report_fatal_error("failed to perform tail call elimination on a call "
5681 "site marked musttail");
5682
5683 // When long calls (i.e. indirect calls) are always used, calls are always
5684 // made via function pointer. If we have a function name, first translate it
5685 // into a pointer.
5686 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5687 !isTailCall)
5688 Callee = LowerGlobalAddress(Callee, DAG);
5689
5690 CallFlags CFlags(
5691 CallConv, isTailCall, isVarArg, isPatchPoint,
5692 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5693 // hasNest
5694 Subtarget.is64BitELFABI() &&
5695 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5696 CLI.NoMerge);
5697
5698 if (Subtarget.isAIXABI())
5699 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5700 InVals, CB);
5701
5702 assert(Subtarget.isSVR4ABI());
5703 if (Subtarget.isPPC64())
5704 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5705 InVals, CB);
5706 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5707 InVals, CB);
5708}
5709
5710SDValue PPCTargetLowering::LowerCall_32SVR4(
5711 SDValue Chain, SDValue Callee, CallFlags CFlags,
5713 const SmallVectorImpl<SDValue> &OutVals,
5714 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5716 const CallBase *CB) const {
5717 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5718 // of the 32-bit SVR4 ABI stack frame layout.
5719
5720 const CallingConv::ID CallConv = CFlags.CallConv;
5721 const bool IsVarArg = CFlags.IsVarArg;
5722 const bool IsTailCall = CFlags.IsTailCall;
5723
5724 assert((CallConv == CallingConv::C ||
5725 CallConv == CallingConv::Cold ||
5726 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5727
5728 const Align PtrAlign(4);
5729
5731
5732 // Mark this function as potentially containing a function that contains a
5733 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5734 // and restoring the callers stack pointer in this functions epilog. This is
5735 // done because by tail calling the called function might overwrite the value
5736 // in this function's (MF) stack pointer stack slot 0(SP).
5737 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5738 CallConv == CallingConv::Fast)
5739 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5740
5741 // Count how many bytes are to be pushed on the stack, including the linkage
5742 // area, parameter list area and the part of the local variable space which
5743 // contains copies of aggregates which are passed by value.
5744
5745 // Assign locations to all of the outgoing arguments.
5747 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5748
5749 // Reserve space for the linkage area on the stack.
5750 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5751 PtrAlign);
5752 if (useSoftFloat())
5753 CCInfo.PreAnalyzeCallOperands(Outs);
5754
5755 if (IsVarArg) {
5756 // Handle fixed and variable vector arguments differently.
5757 // Fixed vector arguments go into registers as long as registers are
5758 // available. Variable vector arguments always go into memory.
5759 unsigned NumArgs = Outs.size();
5760
5761 for (unsigned i = 0; i != NumArgs; ++i) {
5762 MVT ArgVT = Outs[i].VT;
5763 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5764 bool Result;
5765
5766 if (Outs[i].IsFixed) {
5767 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5768 CCInfo);
5769 } else {
5771 ArgFlags, CCInfo);
5772 }
5773
5774 if (Result) {
5775#ifndef NDEBUG
5776 errs() << "Call operand #" << i << " has unhandled type "
5777 << EVT(ArgVT).getEVTString() << "\n";
5778#endif
5779 llvm_unreachable(nullptr);
5780 }
5781 }
5782 } else {
5783 // All arguments are treated the same.
5784 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5785 }
5786 CCInfo.clearWasPPCF128();
5787
5788 // Assign locations to all of the outgoing aggregate by value arguments.
5789 SmallVector<CCValAssign, 16> ByValArgLocs;
5790 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5791
5792 // Reserve stack space for the allocations in CCInfo.
5793 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5794
5795 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5796
5797 // Size of the linkage area, parameter list area and the part of the local
5798 // space variable where copies of aggregates which are passed by value are
5799 // stored.
5800 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5801
5802 // Calculate by how many bytes the stack has to be adjusted in case of tail
5803 // call optimization.
5804 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5805
5806 // Adjust the stack pointer for the new arguments...
5807 // These operations are automatically eliminated by the prolog/epilog pass
5808 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5809 SDValue CallSeqStart = Chain;
5810
5811 // Load the return address and frame pointer so it can be moved somewhere else
5812 // later.
5813 SDValue LROp, FPOp;
5814 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5815
5816 // Set up a copy of the stack pointer for use loading and storing any
5817 // arguments that may not fit in the registers available for argument
5818 // passing.
5819 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5820
5822 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5823 SmallVector<SDValue, 8> MemOpChains;
5824
5825 bool seenFloatArg = false;
5826 // Walk the register/memloc assignments, inserting copies/loads.
5827 // i - Tracks the index into the list of registers allocated for the call
5828 // RealArgIdx - Tracks the index into the list of actual function arguments
5829 // j - Tracks the index into the list of byval arguments
5830 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5831 i != e;
5832 ++i, ++RealArgIdx) {
5833 CCValAssign &VA = ArgLocs[i];
5834 SDValue Arg = OutVals[RealArgIdx];
5835 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5836
5837 if (Flags.isByVal()) {
5838 // Argument is an aggregate which is passed by value, thus we need to
5839 // create a copy of it in the local variable space of the current stack
5840 // frame (which is the stack frame of the caller) and pass the address of
5841 // this copy to the callee.
5842 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5843 CCValAssign &ByValVA = ByValArgLocs[j++];
5844 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5845
5846 // Memory reserved in the local variable space of the callers stack frame.
5847 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5848
5849 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5850 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5851 StackPtr, PtrOff);
5852
5853 // Create a copy of the argument in the local area of the current
5854 // stack frame.
5855 SDValue MemcpyCall =
5857 CallSeqStart.getNode()->getOperand(0),
5858 Flags, DAG, dl);
5859
5860 // This must go outside the CALLSEQ_START..END.
5861 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5862 SDLoc(MemcpyCall));
5863 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5864 NewCallSeqStart.getNode());
5865 Chain = CallSeqStart = NewCallSeqStart;
5866
5867 // Pass the address of the aggregate copy on the stack either in a
5868 // physical register or in the parameter list area of the current stack
5869 // frame to the callee.
5870 Arg = PtrOff;
5871 }
5872
5873 // When useCRBits() is true, there can be i1 arguments.
5874 // It is because getRegisterType(MVT::i1) => MVT::i1,
5875 // and for other integer types getRegisterType() => MVT::i32.
5876 // Extend i1 and ensure callee will get i32.
5877 if (Arg.getValueType() == MVT::i1)
5878 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5879 dl, MVT::i32, Arg);
5880
5881 if (VA.isRegLoc()) {
5882 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5883 // Put argument in a physical register.
5884 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5885 bool IsLE = Subtarget.isLittleEndian();
5887 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5888 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5889 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5890 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5891 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5892 SVal.getValue(0)));
5893 } else
5894 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5895 } else {
5896 // Put argument in the parameter list area of the current stack frame.
5897 assert(VA.isMemLoc());
5898 unsigned LocMemOffset = VA.getLocMemOffset();
5899
5900 if (!IsTailCall) {
5901 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5902 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5903 StackPtr, PtrOff);
5904
5905 MemOpChains.push_back(
5906 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5907 } else {
5908 // Calculate and remember argument location.
5909 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5910 TailCallArguments);
5911 }
5912 }
5913 }
5914
5915 if (!MemOpChains.empty())
5916 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5917
5918 // Build a sequence of copy-to-reg nodes chained together with token chain
5919 // and flag operands which copy the outgoing args into the appropriate regs.
5920 SDValue InFlag;
5921 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5922 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5923 RegsToPass[i].second, InFlag);
5924 InFlag = Chain.getValue(1);
5925 }
5926
5927 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5928 // registers.
5929 if (IsVarArg) {
5931 SDValue Ops[] = { Chain, InFlag };
5932
5933 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5934 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5935
5936 InFlag = Chain.getValue(1);
5937 }
5938
5939 if (IsTailCall)
5940 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5941 TailCallArguments);
5942
5943 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5944 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5945}
5946
5947// Copy an argument into memory, being careful to do this outside the
5948// call sequence for the call to which the argument belongs.
5949SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5950 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5951 SelectionDAG &DAG, const SDLoc &dl) const {
5952 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5953 CallSeqStart.getNode()->getOperand(0),
5954 Flags, DAG, dl);
5955 // The MEMCPY must go outside the CALLSEQ_START..END.
5956 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5957 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5958 SDLoc(MemcpyCall));
5959 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5960 NewCallSeqStart.getNode());
5961 return NewCallSeqStart;
5962}
5963
5964SDValue PPCTargetLowering::LowerCall_64SVR4(
5965 SDValue Chain, SDValue Callee, CallFlags CFlags,
5967 const SmallVectorImpl<SDValue> &OutVals,
5968 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5970 const CallBase *CB) const {
5971 bool isELFv2ABI = Subtarget.isELFv2ABI();
5972 bool isLittleEndian = Subtarget.isLittleEndian();
5973 unsigned NumOps = Outs.size();
5974 bool IsSibCall = false;
5975 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5976
5977 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5978 unsigned PtrByteSize = 8;
5979
5981
5982 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5983 IsSibCall = true;
5984
5985 // Mark this function as potentially containing a function that contains a
5986 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5987 // and restoring the callers stack pointer in this functions epilog. This is
5988 // done because by tail calling the called function might overwrite the value
5989 // in this function's (MF) stack pointer stack slot 0(SP).
5990 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5991 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5992
5993 assert(!(IsFastCall && CFlags.IsVarArg) &&
5994 "fastcc not supported on varargs functions");
5995
5996 // Count how many bytes are to be pushed on the stack, including the linkage
5997 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5998 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5999 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6000 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6001 unsigned NumBytes = LinkageSize;
6002 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6003
6004 static const MCPhysReg GPR[] = {
6005 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6006 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6007 };
6008 static const MCPhysReg VR[] = {
6009 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6010 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6011 };
6012
6013 const unsigned NumGPRs = array_lengthof(GPR);
6014 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6015 const unsigned NumVRs = array_lengthof(VR);
6016
6017 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6018 // can be passed to the callee in registers.
6019 // For the fast calling convention, there is another check below.
6020 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6021 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6022 if (!HasParameterArea) {
6023 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6024 unsigned AvailableFPRs = NumFPRs;
6025 unsigned AvailableVRs = NumVRs;
6026 unsigned NumBytesTmp = NumBytes;
6027 for (unsigned i = 0; i != NumOps; ++i) {
6028 if (Outs[i].Flags.isNest()) continue;
6029 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6030 PtrByteSize, LinkageSize, ParamAreaSize,
6031 NumBytesTmp, AvailableFPRs, AvailableVRs))
6032 HasParameterArea = true;
6033 }
6034 }
6035
6036 // When using the fast calling convention, we don't provide backing for
6037 // arguments that will be in registers.
6038 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6039
6040 // Avoid allocating parameter area for fastcc functions if all the arguments
6041 // can be passed in the registers.
6042 if (IsFastCall)
6043 HasParameterArea = false;
6044
6045 // Add up all the space actually used.
6046 for (unsigned i = 0; i != NumOps; ++i) {
6047 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6048 EVT ArgVT = Outs[i].VT;
6049 EVT OrigVT = Outs[i].ArgVT;
6050
6051 if (Flags.isNest())
6052 continue;
6053
6054 if (IsFastCall) {
6055 if (Flags.isByVal()) {
6056 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6057 if (NumGPRsUsed > NumGPRs)
6058 HasParameterArea = true;
6059 } else {
6060 switch (ArgVT.getSimpleVT().SimpleTy) {
6061 default: llvm_unreachable("Unexpected ValueType for argument!");
6062 case MVT::i1:
6063 case MVT::i32:
6064 case MVT::i64:
6065 if (++NumGPRsUsed <= NumGPRs)
6066 continue;
6067 break;
6068 case MVT::v4i32:
6069 case MVT::v8i16:
6070 case MVT::v16i8:
6071 case MVT::v2f64:
6072 case MVT::v2i64:
6073 case MVT::v1i128:
6074 case MVT::f128:
6075 if (++NumVRsUsed <= NumVRs)
6076 continue;
6077 break;
6078 case MVT::v4f32:
6079 if (++NumVRsUsed <= NumVRs)
6080 continue;
6081 break;
6082 case MVT::f32:
6083 case MVT::f64:
6084 if (++NumFPRsUsed <= NumFPRs)
6085 continue;
6086 break;
6087 }
6088 HasParameterArea = true;
6089 }
6090 }
6091
6092 /* Respect alignment of argument on the stack. */
6093 auto Alignement =
6094 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6095 NumBytes = alignTo(NumBytes, Alignement);
6096
6097 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6098 if (Flags.isInConsecutiveRegsLast())
6099 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6100 }
6101
6102 unsigned NumBytesActuallyUsed = NumBytes;
6103
6104 // In the old ELFv1 ABI,
6105 // the prolog code of the callee may store up to 8 GPR argument registers to
6106 // the stack, allowing va_start to index over them in memory if its varargs.
6107 // Because we cannot tell if this is needed on the caller side, we have to
6108 // conservatively assume that it is needed. As such, make sure we have at
6109 // least enough stack space for the caller to store the 8 GPRs.
6110 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6111 // really requires memory operands, e.g. a vararg function.
6112 if (HasParameterArea)
6113 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6114 else
6115 NumBytes = LinkageSize;
6116
6117 // Tail call needs the stack to be aligned.
6118 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6119 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6120
6121 int SPDiff = 0;
6122
6123 // Calculate by how many bytes the stack has to be adjusted in case of tail
6124 // call optimization.
6125 if (!IsSibCall)
6126 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6127
6128 // To protect arguments on the stack from being clobbered in a tail call,
6129 // force all the loads to happen before doing any other lowering.
6130 if (CFlags.IsTailCall)
6131 Chain = DAG.getStackArgumentTokenFactor(Chain);
6132
6133 // Adjust the stack pointer for the new arguments...
6134 // These operations are automatically eliminated by the prolog/epilog pass
6135 if (!IsSibCall)
6136 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6137 SDValue CallSeqStart = Chain;
6138
6139 // Load the return address and frame pointer so it can be move somewhere else
6140 // later.
6141 SDValue LROp, FPOp;
6142 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6143
6144 // Set up a copy of the stack pointer for use loading and storing any
6145 // arguments that may not fit in the registers available for argument
6146 // passing.
6147 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6148
6149 // Figure out which arguments are going to go in registers, and which in
6150 // memory. Also, if this is a vararg function, floating point operations
6151 // must be stored to our stack, and loaded into integer regs as well, if
6152 // any integer regs are available for argument passing.
6153 unsigned ArgOffset = LinkageSize;
6154
6156 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6157
6158 SmallVector<SDValue, 8> MemOpChains;
6159 for (unsigned i = 0; i != NumOps; ++i) {
6160 SDValue Arg = OutVals[i];
6161 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6162 EVT ArgVT = Outs[i].VT;
6163 EVT OrigVT = Outs[i].ArgVT;
6164
6165 // PtrOff will be used to store the current argument to the stack if a
6166 // register cannot be found for it.
6167 SDValue PtrOff;
6168
6169 // We re-align the argument offset for each argument, except when using the
6170 // fast calling convention, when we need to make sure we do that only when
6171 // we'll actually use a stack slot.
6172 auto ComputePtrOff = [&]() {
6173 /* Respect alignment of argument on the stack. */
6174 auto Alignment =
6175 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6176 ArgOffset = alignTo(ArgOffset, Alignment);
6177
6178 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6179
6180 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6181 };
6182
6183 if (!IsFastCall) {
6184 ComputePtrOff();
6185
6186 /* Compute GPR index associated with argument offset. */
6187 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6188 GPR_idx = std::min(GPR_idx, NumGPRs);
6189 }
6190
6191 // Promote integers to 64-bit values.
6192 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6193 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6194 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6195 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6196 }
6197
6198 // FIXME memcpy is used way more than necessary. Correctness first.
6199 // Note: "by value" is code for passing a structure by value, not
6200 // basic types.
6201 if (Flags.isByVal()) {
6202 // Note: Size includes alignment padding, so
6203 // struct x { short a; char b; }
6204 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6205 // These are the proper values we need for right-justifying the
6206 // aggregate in a parameter register.
6207 unsigned Size = Flags.getByValSize();
6208
6209 // An empty aggregate parameter takes up no storage and no
6210 // registers.
6211 if (Size == 0)
6212 continue;
6213
6214 if (IsFastCall)
6215 ComputePtrOff();
6216
6217 // All aggregates smaller than 8 bytes must be passed right-justified.
6218 if (Size==1 || Size==2 || Size==4) {
6219 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6220 if (GPR_idx != NumGPRs) {
6221 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6222 MachinePointerInfo(), VT);
6223 MemOpChains.push_back(Load.getValue(1));
6224 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6225
6226 ArgOffset += PtrByteSize;
6227 continue;
6228 }
6229 }
6230
6231 if (GPR_idx == NumGPRs && Size < 8) {
6232 SDValue AddPtr = PtrOff;
6233 if (!isLittleEndian) {
6234 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6235 PtrOff.getValueType());
6236 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6237 }
6238 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6239 CallSeqStart,
6240 Flags, DAG, dl);
6241 ArgOffset += PtrByteSize;
6242 continue;
6243 }
6244 // Copy the object to parameter save area if it can not be entirely passed
6245 // by registers.
6246 // FIXME: we only need to copy the parts which need to be passed in
6247 // parameter save area. For the parts passed by registers, we don't need
6248 // to copy them to the stack although we need to allocate space for them
6249 // in parameter save area.
6250 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6251 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6252 CallSeqStart,
6253 Flags, DAG, dl);
6254
6255 // When a register is available, pass a small aggregate right-justified.
6256 if (Size < 8 && GPR_idx != NumGPRs) {
6257 // The easiest way to get this right-justified in a register
6258 // is to copy the structure into the rightmost portion of a
6259 // local variable slot, then load the whole slot into the
6260 // register.
6261 // FIXME: The memcpy seems to produce pretty awful code for
6262 // small aggregates, particularly for packed ones.
6263 // FIXME: It would be preferable to use the slot in the
6264 // parameter save area instead of a new local variable.
6265 SDValue AddPtr = PtrOff;
6266 if (!isLittleEndian) {
6267 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6268 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6269 }
6270 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6271 CallSeqStart,
6272 Flags, DAG, dl);
6273
6274 // Load the slot into the register.
6275 SDValue Load =
6276 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6277 MemOpChains.push_back(Load.getValue(1));
6278 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6279
6280 // Done with this argument.
6281 ArgOffset += PtrByteSize;
6282 continue;
6283 }
6284
6285 // For aggregates larger than PtrByteSize, copy the pieces of the
6286 // object that fit into registers from the parameter save area.
6287 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6288 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6289 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6290 if (GPR_idx != NumGPRs) {
6291 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6292 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6293 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6294 MachinePointerInfo(), ObjType);
6295
6296 MemOpChains.push_back(Load.getValue(1));
6297 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6298 ArgOffset += PtrByteSize;
6299 } else {
6300 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6301 break;
6302 }
6303 }
6304 continue;
6305 }
6306
6307 switch (Arg.getSimpleValueType().SimpleTy) {
6308 default: llvm_unreachable("Unexpected ValueType for argument!");
6309 case MVT::i1:
6310 case MVT::i32:
6311 case MVT::i64:
6312 if (Flags.isNest()) {
6313 // The 'nest' parameter, if any, is passed in R11.
6314 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6315 break;
6316 }
6317
6318 // These can be scalar arguments or elements of an integer array type
6319 // passed directly. Clang may use those instead of "byval" aggregate
6320 // types to avoid forcing arguments to memory unnecessarily.
6321 if (GPR_idx != NumGPRs) {
6322 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6323 } else {
6324 if (IsFastCall)
6325 ComputePtrOff();
6326
6327 assert(HasParameterArea &&
6328 "Parameter area must exist to pass an argument in memory.");
6329 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6330 true, CFlags.IsTailCall, false, MemOpChains,
6331 TailCallArguments, dl);
6332 if (IsFastCall)
6333 ArgOffset += PtrByteSize;
6334 }
6335 if (!IsFastCall)
6336 ArgOffset += PtrByteSize;
6337 break;
6338 case MVT::f32:
6339 case MVT::f64: {
6340 // These can be scalar arguments or elements of a float array type
6341 // passed directly. The latter are used to implement ELFv2 homogenous
6342 // float aggregates.
6343
6344 // Named arguments go into FPRs first, and once they overflow, the
6345 // remaining arguments go into GPRs and then the parameter save area.
6346 // Unnamed arguments for vararg functions always go to GPRs and
6347 // then the parameter save area. For now, put all arguments to vararg
6348 // routines always in both locations (FPR *and* GPR or stack slot).
6349 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6350 bool NeededLoad = false;
6351
6352 // First load the argument into the next available FPR.
6353 if (FPR_idx != NumFPRs)
6354 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6355
6356 // Next, load the argument into GPR or stack slot if needed.
6357 if (!NeedGPROrStack)
6358 ;
6359 else if (GPR_idx != NumGPRs && !IsFastCall) {
6360 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6361 // once we support fp <-> gpr moves.
6362
6363 // In the non-vararg case, this can only ever happen in the
6364 // presence of f32 array types, since otherwise we never run
6365 // out of FPRs before running out of GPRs.
6366 SDValue ArgVal;
6367
6368 // Double values are always passed in a single GPR.
6369 if (Arg.getValueType() != MVT::f32) {
6370 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6371
6372 // Non-array float values are extended and passed in a GPR.
6373 } else if (!Flags.isInConsecutiveRegs()) {
6374 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6375 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6376
6377 // If we have an array of floats, we collect every odd element
6378 // together with its predecessor into one GPR.
6379 } else if (ArgOffset % PtrByteSize != 0) {
6380 SDValue Lo, Hi;
6381 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6382 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6383 if (!isLittleEndian)
6384 std::swap(Lo, Hi);
6385 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6386
6387 // The final element, if even, goes into the first half of a GPR.
6388 } else if (Flags.isInConsecutiveRegsLast()) {
6389 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6390 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6391 if (!isLittleEndian)
6392 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6393 DAG.getConstant(32, dl, MVT::i32));
6394
6395 // Non-final even elements are skipped; they will be handled
6396 // together the with subsequent argument on the next go-around.
6397 } else
6398 ArgVal = SDValue();
6399
6400 if (ArgVal.getNode())
6401 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6402 } else {
6403 if (IsFastCall)
6404 ComputePtrOff();
6405
6406 // Single-precision floating-point values are mapped to the
6407 // second (rightmost) word of the stack doubleword.
6408 if (Arg.getValueType() == MVT::f32 &&
6409 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6410 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6411 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6412 }
6413
6414 assert(HasParameterArea &&
6415 "Parameter area must exist to pass an argument in memory.");
6416 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6417 true, CFlags.IsTailCall, false, MemOpChains,
6418 TailCallArguments, dl);
6419
6420 NeededLoad = true;
6421 }
6422 // When passing an array of floats, the array occupies consecutive
6423 // space in the argument area; only round up to the next doubleword
6424 // at the end of the array. Otherwise, each float takes 8 bytes.
6425 if (!IsFastCall || NeededLoad) {
6426 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6427 Flags.isInConsecutiveRegs()) ? 4 : 8;
6428 if (Flags.isInConsecutiveRegsLast())
6429 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6430 }
6431 break;
6432 }
6433 case MVT::v4f32:
6434 case MVT::v4i32:
6435 case MVT::v8i16:
6436 case MVT::v16i8:
6437 case MVT::v2f64:
6438 case MVT::v2i64:
6439 case MVT::v1i128:
6440 case MVT::f128:
6441 // These can be scalar arguments or elements of a vector array type
6442 // passed directly. The latter are used to implement ELFv2 homogenous
6443 // vector aggregates.
6444
6445 // For a varargs call, named arguments go into VRs or on the stack as
6446 // usual; unnamed arguments always go to the stack or the corresponding
6447 // GPRs when within range. For now, we always put the value in both
6448 // locations (or even all three).
6449 if (CFlags.IsVarArg) {
6450 assert(HasParameterArea &&
6451 "Parameter area must exist if we have a varargs call.");
6452 // We could elide this store in the case where the object fits
6453 // entirely in R registers. Maybe later.
6454 SDValue Store =
6455 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6456 MemOpChains.push_back(Store);
6457 if (VR_idx != NumVRs) {
6458 SDValue Load =
6459 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6460 MemOpChains.push_back(Load.getValue(1));
6461 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6462 }
6463 ArgOffset += 16;
6464 for (unsigned i=0; i<16; i+=PtrByteSize) {
6465 if (GPR_idx == NumGPRs)
6466 break;
6467 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6468 DAG.getConstant(i, dl, PtrVT));
6469 SDValue Load =
6470 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6471 MemOpChains.push_back(Load.getValue(1));
6472 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6473 }
6474 break;
6475 }
6476
6477 // Non-varargs Altivec params go into VRs or on the stack.
6478 if (VR_idx != NumVRs) {
6479 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6480 } else {
6481 if (IsFastCall)
6482 ComputePtrOff();
6483
6484 assert(HasParameterArea &&
6485 "Parameter area must exist to pass an argument in memory.");
6486 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6487 true, CFlags.IsTailCall, true, MemOpChains,
6488 TailCallArguments, dl);
6489 if (IsFastCall)
6490 ArgOffset += 16;
6491 }
6492
6493 if (!IsFastCall)
6494 ArgOffset += 16;
6495 break;
6496 }
6497 }
6498
6499 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6500 "mismatch in size of parameter area");
6501 (void)NumBytesActuallyUsed;
6502
6503 if (!MemOpChains.empty())
6504 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6505
6506 // Check if this is an indirect call (MTCTR/BCTRL).
6507 // See prepareDescriptorIndirectCall and buildCallOperands for more
6508 // information about calls through function pointers in the 64-bit SVR4 ABI.
6509 if (CFlags.IsIndirect) {
6510 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6511 // caller in the TOC save area.
6512 if (isTOCSaveRestoreRequired(Subtarget)) {
6513 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6514 // Load r2 into a virtual register and store it to the TOC save area.
6515 setUsesTOCBasePtr(DAG);
6516 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6517 // TOC save area offset.
6518 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6519 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6520 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6521 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6523 DAG.getMachineFunction(), TOCSaveOffset));
6524 }
6525 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6526 // This does not mean the MTCTR instruction must use R12; it's easier
6527 // to model this as an extra parameter, so do that.
6528 if (isELFv2ABI && !CFlags.IsPatchPoint)
6529 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6530 }
6531
6532 // Build a sequence of copy-to-reg nodes chained together with token chain
6533 // and flag operands which copy the outgoing args into the appropriate regs.
6534 SDValue InFlag;
6535 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6536 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6537 RegsToPass[i].second, InFlag);
6538 InFlag = Chain.getValue(1);
6539 }
6540
6541 if (CFlags.IsTailCall && !IsSibCall)
6542 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6543 TailCallArguments);
6544
6545 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6546 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6547}
6548
6549// Returns true when the shadow of a general purpose argument register
6550// in the parameter save area is aligned to at least 'RequiredAlign'.
6551static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6552 assert(RequiredAlign.value() <= 16 &&
6553 "Required alignment greater than stack alignment.");
6554 switch (Reg) {
6555 default:
6556 report_fatal_error("called on invalid register.");
6557 case PPC::R5:
6558 case PPC::R9:
6559 case PPC::X3:
6560 case PPC::X5:
6561 case PPC::X7:
6562 case PPC::X9:
6563 // These registers are 16 byte aligned which is the most strict aligment
6564 // we can support.
6565 return true;
6566 case PPC::R3:
6567 case PPC::R7:
6568 case PPC::X4:
6569 case PPC::X6:
6570 case PPC::X8:
6571 case PPC::X10:
6572 // The shadow of these registers in the PSA is 8 byte aligned.
6573 return RequiredAlign <= 8;
6574 case PPC::R4:
6575 case PPC::R6:
6576 case PPC::R8:
6577 case PPC::R10:
6578 return RequiredAlign <= 4;
6579 }
6580}
6581
6582static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6583 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6584 CCState &S) {
6585 AIXCCState &State = static_cast<AIXCCState &>(S);
6586 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6587 State.getMachineFunction().getSubtarget());
6588 const bool IsPPC64 = Subtarget.isPPC64();
6589 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6590 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6591
6592 if (ValVT == MVT::f128)
6593 report_fatal_error("f128 is unimplemented on AIX.");
6594
6595 if (ArgFlags.isNest())
6596 report_fatal_error("Nest arguments are unimplemented.");
6597
6598 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6599 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6600 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6601 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6602 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6603 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6604
6605 static const MCPhysReg VR[] = {// Vector registers.
6606 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6607 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6608 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6609
6610 if (ArgFlags.isByVal()) {
6611 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6612 report_fatal_error("Pass-by-value arguments with alignment greater than "
6613 "register width are not supported.");
6614
6615 const unsigned ByValSize = ArgFlags.getByValSize();
6616
6617 // An empty aggregate parameter takes up no storage and no registers,
6618 // but needs a MemLoc for a stack slot for the formal arguments side.
6619 if (ByValSize == 0) {
6621 State.getNextStackOffset(), RegVT,
6622 LocInfo));
6623 return false;
6624 }
6625
6626 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6627 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6628 for (const unsigned E = Offset + StackSize; Offset < E;
6629 Offset += PtrAlign.value()) {
6630 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6631 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6632 else {
6635 LocInfo));
6636 break;
6637 }
6638 }
6639 return false;
6640 }
6641
6642 // Arguments always reserve parameter save area.
6643 switch (ValVT.SimpleTy) {
6644 default:
6645 report_fatal_error("Unhandled value type for argument.");
6646 case MVT::i64:
6647 // i64 arguments should have been split to i32 for PPC32.
6648 assert(IsPPC64 && "PPC32 should have split i64 values.");
6650 case MVT::i1:
6651 case MVT::i32: {
6652 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6653 // AIX integer arguments are always passed in register width.
6654 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6655 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6657 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6658 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6659 else
6660 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6661
6662 return false;
6663 }
6664 case MVT::f32:
6665 case MVT::f64: {
6666 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6667 const unsigned StoreSize = LocVT.getStoreSize();
6668 // Floats are always 4-byte aligned in the PSA on AIX.
6669 // This includes f64 in 64-bit mode for ABI compatibility.
6670 const unsigned Offset =
6671 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6672 unsigned FReg = State.AllocateReg(FPR);
6673 if (FReg)
6674 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6675
6676 // Reserve and initialize GPRs or initialize the PSA as required.
6677 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6678 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6679 assert(FReg && "An FPR should be available when a GPR is reserved.");
6680 if (State.isVarArg()) {
6681 // Successfully reserved GPRs are only initialized for vararg calls.
6682 // Custom handling is required for:
6683 // f64 in PPC32 needs to be split into 2 GPRs.
6684 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6685 State.addLoc(
6686 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6687 }
6688 } else {
6689 // If there are insufficient GPRs, the PSA needs to be initialized.
6690 // Initialization occurs even if an FPR was initialized for
6691 // compatibility with the AIX XL compiler. The full memory for the
6692 // argument will be initialized even if a prior word is saved in GPR.
6693 // A custom memLoc is used when the argument also passes in FPR so
6694 // that the callee handling can skip over it easily.
6695 State.addLoc(
6696 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6697 LocInfo)
6698 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6699 break;
6700 }
6701 }
6702
6703 return false;
6704 }
6705 case MVT::v4f32:
6706 case MVT::v4i32:
6707 case MVT::v8i16:
6708 case MVT::v16i8:
6709 case MVT::v2i64:
6710 case MVT::v2f64:
6711 case MVT::v1i128: {
6712 const unsigned VecSize = 16;
6713 const Align VecAlign(VecSize);
6714
6715 if (!State.isVarArg()) {
6716 // If there are vector registers remaining we don't consume any stack
6717 // space.
6718 if (unsigned VReg = State.AllocateReg(VR)) {
6719 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6720 return false;
6721 }
6722 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6723 // might be allocated in the portion of the PSA that is shadowed by the
6724 // GPRs.
6725 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6726 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6727 return false;
6728 }
6729
6730 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6731 ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6732
6733 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6734 // Burn any underaligned registers and their shadowed stack space until
6735 // we reach the required alignment.
6736 while (NextRegIndex != GPRs.size() &&
6737 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6738 // Shadow allocate register and its stack shadow.
6739 unsigned Reg = State.AllocateReg(GPRs);
6740 State.AllocateStack(PtrSize, PtrAlign);
6741 assert(Reg && "Allocating register unexpectedly failed.");
6742 (void)Reg;
6743 NextRegIndex = State.getFirstUnallocated(GPRs);
6744 }
6745
6746 // Vectors that are passed as fixed arguments are handled differently.
6747 // They are passed in VRs if any are available (unlike arguments passed
6748 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6749 // functions)
6750 if (State.isFixed(ValNo)) {
6751 if (unsigned VReg = State.AllocateReg(VR)) {
6752 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6753 // Shadow allocate GPRs and stack space even though we pass in a VR.
6754 for (unsigned I = 0; I != VecSize; I += PtrSize)
6755 State.AllocateReg(GPRs);
6756 State.AllocateStack(VecSize, VecAlign);
6757 return false;
6758 }
6759 // No vector registers remain so pass on the stack.
6760 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6761 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6762 return false;
6763 }
6764
6765 // If all GPRS are consumed then we pass the argument fully on the stack.
6766 if (NextRegIndex == GPRs.size()) {
6767 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6768 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6769 return false;
6770 }
6771
6772 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6773 // half of the argument, and then need to pass the remaining half on the
6774 // stack.
6775 if (GPRs[NextRegIndex] == PPC::R9) {
6776 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6777 State.addLoc(
6778 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6779
6780 const unsigned FirstReg = State.AllocateReg(PPC::R9);
6781 const unsigned SecondReg = State.AllocateReg(PPC::R10);
6782 assert(FirstReg && SecondReg &&
6783 "Allocating R9 or R10 unexpectedly failed.");
6784 State.addLoc(
6785 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6786 State.addLoc(
6787 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6788 return false;
6789 }
6790
6791 // We have enough GPRs to fully pass the vector argument, and we have
6792 // already consumed any underaligned registers. Start with the custom
6793 // MemLoc and then the custom RegLocs.
6794 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6795 State.addLoc(
6796 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6797 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6798 const unsigned Reg = State.AllocateReg(GPRs);
6799 assert(Reg && "Failed to allocated register for vararg vector argument");
6800 State.addLoc(
6801 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6802 }
6803 return false;
6804 }
6805 }
6806 return true;
6807}
6808
6809// So far, this function is only used by LowerFormalArguments_AIX()
6811 bool IsPPC64,
6812 bool HasP8Vector,
6813 bool HasVSX) {
6814 assert((IsPPC64 || SVT != MVT::i64) &&
6815 "i64 should have been split for 32-bit codegen.");
6816
6817 switch (SVT) {
6818 default:
6819 report_fatal_error("Unexpected value type for formal argument");
6820 case MVT::i1:
6821 case MVT::i32:
6822 case MVT::i64:
6823 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6824 case MVT::f32:
6825 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6826 case MVT::f64:
6827 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6828 case MVT::v4f32:
6829 case MVT::v4i32:
6830 case MVT::v8i16:
6831 case MVT::v16i8:
6832 case MVT::v2i64:
6833 case MVT::v2f64:
6834 case MVT::v1i128:
6835 return &PPC::VRRCRegClass;
6836 }
6837}
6838
6840 SelectionDAG &DAG, SDValue ArgValue,
6841 MVT LocVT, const SDLoc &dl) {
6842 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6843 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6844
6845 if (Flags.isSExt())
6846 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6847 DAG.getValueType(ValVT));
6848 else if (Flags.isZExt())
6849 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6850 DAG.getValueType(ValVT));
6851
6852 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6853}
6854
6855static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6856 const unsigned LASize = FL->getLinkageSize();
6857
6858 if (PPC::GPRCRegClass.contains(Reg)) {
6859 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6860 "Reg must be a valid argument register!");
6861 return LASize + 4 * (Reg - PPC::R3);
6862 }
6863
6864 if (PPC::G8RCRegClass.contains(Reg)) {
6865 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6866 "Reg must be a valid argument register!");
6867 return LASize + 8 * (Reg - PPC::X3);
6868 }
6869
6870 llvm_unreachable("Only general purpose registers expected.");
6871}
6872
6873// AIX ABI Stack Frame Layout:
6874//
6875// Low Memory +--------------------------------------------+
6876// SP +---> | Back chain | ---+
6877// | +--------------------------------------------+ |
6878// | | Saved Condition Register | |
6879// | +--------------------------------------------+ |
6880// | | Saved Linkage Register | |
6881// | +--------------------------------------------+ | Linkage Area
6882// | | Reserved for compilers | |
6883// | +--------------------------------------------+ |
6884// | | Reserved for binders | |
6885// | +--------------------------------------------+ |
6886// | | Saved TOC pointer | ---+
6887// | +--------------------------------------------+
6888// | | Parameter save area |
6889// | +--------------------------------------------+
6890// | | Alloca space |
6891// | +--------------------------------------------+
6892// | | Local variable space |
6893// | +--------------------------------------------+
6894// | | Float/int conversion temporary |
6895// | +--------------------------------------------+
6896// | | Save area for AltiVec registers |
6897// | +--------------------------------------------+
6898// | | AltiVec alignment padding |
6899// | +--------------------------------------------+
6900// | | Save area for VRSAVE register |
6901// | +--------------------------------------------+
6902// | | Save area for General Purpose registers |
6903// | +--------------------------------------------+
6904// | | Save area for Floating Point registers |
6905// | +--------------------------------------------+
6906// +---- | Back chain |
6907// High Memory +--------------------------------------------+
6908//
6909// Specifications:
6910// AIX 7.2 Assembler Language Reference
6911// Subroutine linkage convention
6912
6913SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6914 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6915 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6916 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6917
6918 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6919 CallConv == CallingConv::Fast) &&
6920 "Unexpected calling convention!");
6921
6922 if (getTargetMachine().Options.GuaranteedTailCallOpt)
6923 report_fatal_error("Tail call support is unimplemented on AIX.");
6924
6925 if (useSoftFloat())
6926 report_fatal_error("Soft float support is unimplemented on AIX.");
6927
6928 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
6929
6930 const bool IsPPC64 = Subtarget.isPPC64();
6931 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6932
6933 // Assign locations to all of the incoming arguments.
6936 MachineFrameInfo &MFI = MF.getFrameInfo();
6937 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6938 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6939
6940 const EVT PtrVT = getPointerTy(MF.getDataLayout());
6941 // Reserve space for the linkage area on the stack.
6942 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6943 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6944 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6945
6947
6948 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6949 CCValAssign &VA = ArgLocs[I++];
6950 MVT LocVT = VA.getLocVT();
6951 MVT ValVT = VA.getValVT();
6952 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6953 // For compatibility with the AIX XL compiler, the float args in the
6954 // parameter save area are initialized even if the argument is available
6955 // in register. The caller is required to initialize both the register
6956 // and memory, however, the callee can choose to expect it in either.
6957 // The memloc is dismissed here because the argument is retrieved from
6958 // the register.
6959 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
6960 continue;
6961
6962 auto HandleMemLoc = [&]() {
6963 const unsigned LocSize = LocVT.getStoreSize();
6964 const unsigned ValSize = ValVT.getStoreSize();
6965 assert((ValSize <= LocSize) &&
6966 "Object size is larger than size of MemLoc");
6967 int CurArgOffset = VA.getLocMemOffset();
6968 // Objects are right-justified because AIX is big-endian.
6969 if (LocSize > ValSize)
6970 CurArgOffset += LocSize - ValSize;
6971 // Potential tail calls could cause overwriting of argument stack slots.
6972 const bool IsImmutable =
6974 (CallConv == CallingConv::Fast));
6975 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6976 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6977 SDValue ArgValue =
6978 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6979 InVals.push_back(ArgValue);
6980 };
6981
6982 // Vector arguments to VaArg functions are passed both on the stack, and
6983 // in any available GPRs. Load the value from the stack and add the GPRs
6984 // as live ins.
6985 if (VA.isMemLoc() && VA.needsCustom()) {
6986 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
6987 assert(isVarArg && "Only use custom memloc for vararg.");
6988 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
6989 // matching custom RegLocs.
6990 const unsigned OriginalValNo = VA.getValNo();
6991 (void)OriginalValNo;
6992
6993 auto HandleCustomVecRegLoc = [&]() {
6994 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
6995 "Missing custom RegLoc.");
6996 VA = ArgLocs[I++];
6997 assert(VA.getValVT().isVector() &&
6998 "Unexpected Val type for custom RegLoc.");
6999 assert(VA.getValNo() == OriginalValNo &&
7000 "ValNo mismatch between custom MemLoc and RegLoc.");
7002 MF.addLiveIn(VA.getLocReg(),
7003 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7004 Subtarget.hasVSX()));
7005 };
7006
7007 HandleMemLoc();
7008 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7009 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7010 // R10.
7011 HandleCustomVecRegLoc();
7012 HandleCustomVecRegLoc();
7013
7014 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7015 // we passed the vector in R5, R6, R7 and R8.
7016 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7017 assert(!IsPPC64 &&
7018 "Only 2 custom RegLocs expected for 64-bit codegen.");
7019 HandleCustomVecRegLoc();
7020 HandleCustomVecRegLoc();
7021 }
7022
7023 continue;
7024 }
7025
7026 if (VA.isRegLoc()) {
7027 if (VA.getValVT().isScalarInteger())
7029 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7030 switch (VA.getValVT().SimpleTy) {
7031 default:
7032 report_fatal_error("Unhandled value type for argument.");
7033 case MVT::f32:
7035 break;
7036 case MVT::f64:
7038 break;
7039 }
7040 } else if (VA.getValVT().isVector()) {
7041 switch (VA.getValVT().SimpleTy) {
7042 default:
7043 report_fatal_error("Unhandled value type for argument.");
7044 case MVT::v16i8:
7046 break;
7047 case MVT::v8i16:
7049 break;
7050 case MVT::v4i32:
7051 case MVT::v2i64:
7052 case MVT::v1i128:
7054 break;
7055 case MVT::v4f32:
7056 case MVT::v2f64:
7058 break;
7059 }
7060 }
7061 }
7062
7063 if (Flags.isByVal() && VA.isMemLoc()) {
7064 const unsigned Size =
7065 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7066 PtrByteSize);
7067 const int FI = MF.getFrameInfo().CreateFixedObject(
7068 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7069 /* IsAliased */ true);
7070 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7071 InVals.push_back(FIN);
7072
7073 continue;
7074 }
7075
7076 if (Flags.isByVal()) {
7077 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7078
7079 const MCPhysReg ArgReg = VA.getLocReg();
7080 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7081
7082 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7083 report_fatal_error("Over aligned byvals not supported yet.");
7084
7085 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7086 const int FI = MF.getFrameInfo().CreateFixedObject(
7087 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7088 /* IsAliased */ true);
7089 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7090 InVals.push_back(FIN);
7091
7092 // Add live ins for all the RegLocs for the same ByVal.
7093 const TargetRegisterClass *RegClass =
7094 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7095
7096 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7097 unsigned Offset) {
7098 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7099 // Since the callers side has left justified the aggregate in the
7100 // register, we can simply store the entire register into the stack
7101 // slot.
7102 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7103 // The store to the fixedstack object is needed becuase accessing a
7104 // field of the ByVal will use a gep and load. Ideally we will optimize
7105 // to extracting the value from the register directly, and elide the
7106 // stores when the arguments address is not taken, but that will need to
7107 // be future work.
7108 SDValue Store = DAG.getStore(
7109 CopyFrom.getValue(1), dl, CopyFrom,
7112
7113 MemOps.push_back(Store);
7114 };
7115
7116 unsigned Offset = 0;
7117 HandleRegLoc(VA.getLocReg(), Offset);
7118 Offset += PtrByteSize;
7119 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7120 Offset += PtrByteSize) {
7121 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7122 "RegLocs should be for ByVal argument.");
7123
7124 const CCValAssign RL = ArgLocs[I++];
7125 HandleRegLoc(RL.getLocReg(), Offset);
7127 }
7128
7129 if (Offset != StackSize) {
7130 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7131 "Expected MemLoc for remaining bytes.");
7132 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7133 // Consume the MemLoc.The InVal has already been emitted, so nothing
7134 // more needs to be done.
7135 ++I;
7136 }
7137
7138 continue;
7139 }
7140
7141 if (VA.isRegLoc() && !VA.needsCustom()) {
7142 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7143 Register VReg =
7144 MF.addLiveIn(VA.getLocReg(),
7145 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7146 Subtarget.hasVSX()));
7147 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7148 if (ValVT.isScalarInteger() &&
7149 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7150 ArgValue =
7151 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7152 }
7153 InVals.push_back(ArgValue);
7154 continue;
7155 }
7156 if (VA.isMemLoc()) {
7157 HandleMemLoc();
7158 continue;
7159 }
7160 }
7161
7162 // On AIX a minimum of 8 words is saved to the parameter save area.
7163 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7164 // Area that is at least reserved in the caller of this function.
7165 unsigned CallerReservedArea =
7166 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7167
7168 // Set the size that is at least reserved in caller of this function. Tail
7169 // call optimized function's reserved stack space needs to be aligned so
7170 // that taking the difference between two stack areas will result in an
7171 // aligned stack.
7172 CallerReservedArea =
7173 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7174 FuncInfo->setMinReservedArea(CallerReservedArea);
7175
7176 if (isVarArg) {
7177 FuncInfo->setVarArgsFrameIndex(
7178 MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7179 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7180
7181 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7182 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7183
7184 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7185 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7186 const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7187
7188 // The fixed integer arguments of a variadic function are stored to the
7189 // VarArgsFrameIndex on the stack so that they may be loaded by
7190 // dereferencing the result of va_next.
7191 for (unsigned GPRIndex =
7192 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7193 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7194
7195 const Register VReg =
7196 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7197 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7198
7199 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7200 SDValue Store =
7201 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7202 MemOps.push_back(Store);
7203 // Increment the address for the next argument to store.
7204 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7205 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7206 }
7207 }
7208
7209 if (!MemOps.empty())
7210 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7211
7212 return Chain;
7213}
7214
7215SDValue PPCTargetLowering::LowerCall_AIX(
7216 SDValue Chain, SDValue Callee, CallFlags CFlags,
7218 const SmallVectorImpl<SDValue> &OutVals,
7219 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7221 const CallBase *CB) const {
7222 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7223 // AIX ABI stack frame layout.
7224
7225 assert((CFlags.CallConv == CallingConv::C ||
7226 CFlags.CallConv == CallingConv::Cold ||
7227 CFlags.CallConv == CallingConv::Fast) &&
7228 "Unexpected calling convention!");
7229
7230 if (CFlags.IsPatchPoint)
7231 report_fatal_error("This call type is unimplemented on AIX.");
7232
7233 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7234
7237 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7238 *DAG.getContext());
7239
7240 // Reserve space for the linkage save area (LSA) on the stack.
7241 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7242 // [SP][CR][LR][2 x reserved][TOC].
7243 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7244 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7245 const bool IsPPC64 = Subtarget.isPPC64();
7246 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7247 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7248 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7249 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7250
7251 // The prolog code of the callee may store up to 8 GPR argument registers to
7252 // the stack, allowing va_start to index over them in memory if the callee
7253 // is variadic.
7254 // Because we cannot tell if this is needed on the caller side, we have to
7255 // conservatively assume that it is needed. As such, make sure we have at
7256 // least enough stack space for the caller to store the 8 GPRs.
7257 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7258 const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7259 CCInfo.getNextStackOffset());
7260
7261 // Adjust the stack pointer for the new arguments...
7262 // These operations are automatically eliminated by the prolog/epilog pass.
7263 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7264 SDValue CallSeqStart = Chain;
7265
7267 SmallVector<SDValue, 8> MemOpChains;
7268
7269 // Set up a copy of the stack pointer for loading and storing any
7270 // arguments that may not fit in the registers available for argument
7271 // passing.
7272 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7273 : DAG.getRegister(PPC::R1, MVT::i32);
7274
7275 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7276 const unsigned ValNo = ArgLocs[I].getValNo();
7277 SDValue Arg = OutVals[ValNo];
7278 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7279
7280 if (Flags.isByVal()) {
7281 const unsigned ByValSize = Flags.getByValSize();
7282
7283 // Nothing to do for zero-sized ByVals on the caller side.
7284 if (!ByValSize) {
7285 ++I;
7286 continue;
7287 }
7288
7289 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7290 return DAG.getExtLoad(
7291 ISD::ZEXTLOAD, dl, PtrVT, Chain,
7292 (LoadOffset != 0)
7293 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7294 : Arg,
7295 MachinePointerInfo(), VT);
7296 };
7297
7298 unsigned LoadOffset = 0;
7299
7300 // Initialize registers, which are fully occupied by the by-val argument.
7301 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7302 SDValue Load = GetLoad(PtrVT, LoadOffset);
7303 MemOpChains.push_back(Load.getValue(1));
7304 LoadOffset += PtrByteSize;
7305 const CCValAssign &ByValVA = ArgLocs[I++];
7306 assert(ByValVA.getValNo() == ValNo &&
7307 "Unexpected location for pass-by-value argument.");
7308 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7309 }
7310
7311 if (LoadOffset == ByValSize)
7312 continue;
7313
7314 // There must be one more loc to handle the remainder.
7315 assert(ArgLocs[I].getValNo() == ValNo &&
7316 "Expected additional location for by-value argument.");
7317
7318 if (ArgLocs[I].isMemLoc()) {
7319 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7320 const CCValAssign &ByValVA = ArgLocs[I++];
7321 ISD::ArgFlagsTy MemcpyFlags = Flags;
7322 // Only memcpy the bytes that don't pass in register.
7323 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7324 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7325 (LoadOffset != 0)
7326 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7327 : Arg,
7328 DAG.getObjectPtrOffset(dl, StackPtr,
7329 TypeSize::Fixed(ByValVA.getLocMemOffset())),
7330 CallSeqStart, MemcpyFlags, DAG, dl);
7331 continue;
7332 }
7333
7334 // Initialize the final register residue.
7335 // Any residue that occupies the final by-val arg register must be
7336 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7337 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7338 // 2 and 1 byte loads.
7339 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7340 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7341 "Unexpected register residue for by-value argument.");
7342 SDValue ResidueVal;
7343 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7344 const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7345 const MVT VT =
7346 N == 1 ? MVT::i8
7347 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7348 SDValue Load = GetLoad(VT, LoadOffset);
7349 MemOpChains.push_back(Load.getValue(1));
7350 LoadOffset += N;
7351 Bytes += N;
7352
7353 // By-val arguments are passed left-justfied in register.
7354 // Every load here needs to be shifted, otherwise a full register load
7355 // should have been used.
7356 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7357 "Unexpected load emitted during handling of pass-by-value "
7358 "argument.");
7359 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7360 EVT ShiftAmountTy =
7361 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7362 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7363 SDValue ShiftedLoad =
7364 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7365 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7366 ShiftedLoad)
7367 : ShiftedLoad;
7368 }
7369
7370 const CCValAssign &ByValVA = ArgLocs[I++];
7371 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7372 continue;
7373 }
7374
7375 CCValAssign &VA = ArgLocs[I++];
7376 const MVT LocVT = VA.getLocVT();
7377 const MVT ValVT = VA.getValVT();
7378
7379 switch (VA.getLocInfo()) {
7380 default:
7381 report_fatal_error("Unexpected argument extension type.");
7382 case CCValAssign::Full:
7383 break;
7384 case CCValAssign::ZExt:
7385 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7386 break;
7387 case CCValAssign::SExt:
7388 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7389 break;
7390 }
7391
7392 if (VA.isRegLoc() && !VA.needsCustom()) {
7393 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7394 continue;
7395 }
7396
7397 // Vector arguments passed to VarArg functions need custom handling when
7398 // they are passed (at least partially) in GPRs.
7399 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7400 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7401 // Store value to its stack slot.
7402 SDValue PtrOff =
7403 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7404 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7405 SDValue Store =
7406 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7407 MemOpChains.push_back(Store);
7408 const unsigned OriginalValNo = VA.getValNo();
7409 // Then load the GPRs from the stack
7410 unsigned LoadOffset = 0;
7411 auto HandleCustomVecRegLoc = [&]() {
7412 assert(I != E && "Unexpected end of CCvalAssigns.");
7413 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7414 "Expected custom RegLoc.");
7415 CCValAssign RegVA = ArgLocs[I++];
7416 assert(RegVA.getValNo() == OriginalValNo &&
7417 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7418 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7419 DAG.getConstant(LoadOffset, dl, PtrVT));
7420 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7421 MemOpChains.push_back(Load.getValue(1));
7422 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7423 LoadOffset += PtrByteSize;
7424 };
7425
7426 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7427 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7428 // R10.
7429 HandleCustomVecRegLoc();
7430 HandleCustomVecRegLoc();
7431
7432 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7433 ArgLocs[I].getValNo() == OriginalValNo) {
7434 assert(!IsPPC64 &&
7435 "Only 2 custom RegLocs expected for 64-bit codegen.");
7436 HandleCustomVecRegLoc();
7437 HandleCustomVecRegLoc();
7438 }
7439
7440 continue;
7441 }
7442
7443 if (VA.isMemLoc()) {
7444 SDValue PtrOff =
7445 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7446 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7447 MemOpChains.push_back(
7448 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7449
7450 continue;
7451 }
7452
7453 if (!ValVT.isFloatingPoint())
7455 "Unexpected register handling for calling convention.");
7456
7457 // Custom handling is used for GPR initializations for vararg float
7458 // arguments.
7459 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7460 LocVT.isInteger() &&
7461 "Custom register handling only expected for VarArg.");
7462
7463 SDValue ArgAsInt =
7465
7466 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7467 // f32 in 32-bit GPR
7468 // f64 in 64-bit GPR
7469 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7470 else if (Arg.getValueType().getFixedSizeInBits() <
7471 LocVT.getFixedSizeInBits())
7472 // f32 in 64-bit GPR.
7473 RegsToPass.push_back(std::make_pair(
7474 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7475 else {
7476 // f64 in two 32-bit GPRs
7477 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7478 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7479 "Unexpected custom register for argument!");
7480 CCValAssign &GPR1 = VA;
7481 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7482 DAG.getConstant(32, dl, MVT::i8));
7483 RegsToPass.push_back(std::make_pair(
7484 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7485
7486 if (I != E) {
7487 // If only 1 GPR was available, there will only be one custom GPR and
7488 // the argument will also pass in memory.
7489 CCValAssign &PeekArg = ArgLocs[I];
7490 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7491 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7492 CCValAssign &GPR2 = ArgLocs[I++];
7493 RegsToPass.push_back(std::make_pair(
7494 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7495 }
7496 }
7497 }
7498 }
7499
7500 if (!MemOpChains.empty())
7501 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7502
7503 // For indirect calls, we need to save the TOC base to the stack for
7504 // restoration after the call.
7505 if (CFlags.IsIndirect) {
7506 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7507 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7508 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7509 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7510 const unsigned TOCSaveOffset =
7511 Subtarget.getFrameLowering()->getTOCSaveOffset();
7512
7513 setUsesTOCBasePtr(DAG);
7514 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7515 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7516 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7517 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7518 Chain = DAG.getStore(
7519 Val.getValue(1), dl, Val, AddPtr,
7520 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7521 }
7522
7523 // Build a sequence of copy-to-reg nodes chained together with token chain
7524 // and flag operands which copy the outgoing args into the appropriate regs.
7525 SDValue InFlag;
7526 for (auto Reg : RegsToPass) {
7527 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7528 InFlag = Chain.getValue(1);
7529 }
7530
7531 const int SPDiff = 0;
7532 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7533 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7534}
7535
7536bool
7537PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7538 MachineFunction &MF, bool isVarArg,
7540 LLVMContext &Context) const {
7542 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7543 return CCInfo.CheckReturn(
7544 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7546 : RetCC_PPC);
7547}
7548
7549SDValue
7550PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7551 bool isVarArg,
7553 const SmallVectorImpl<SDValue> &OutVals,
7554 const SDLoc &dl, SelectionDAG &DAG) const {
7556 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7557 *DAG.getContext());
7558 CCInfo.AnalyzeReturn(Outs,
7559 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7561 : RetCC_PPC);
7562
7563 SDValue Flag;
7564 SmallVector<SDValue, 4> RetOps(1, Chain);
7565
7566 // Copy the result values into the output registers.
7567 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7568 CCValAssign &VA = RVLocs[i];
7569 assert(VA.isRegLoc() && "Can only return in registers!");
7570
7571 SDValue Arg = OutVals[RealResIdx];
7572
7573 switch (VA.getLocInfo()) {
7574 default: llvm_unreachable("Unknown loc info!");
7575 case CCValAssign::Full: break;
7576 case CCValAssign::AExt:
7577 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7578 break;
7579 case CCValAssign::ZExt:
7580 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7581 break;
7582 case CCValAssign::SExt:
7583 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7584 break;
7585 }
7586 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7587 bool isLittleEndian = Subtarget.isLittleEndian();
7588 // Legalize ret f64 -> ret 2 x i32.
7589 SDValue SVal =
7591 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7592 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7593 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7594 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7595 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7596 Flag = Chain.getValue(1);
7597 VA = RVLocs[++i]; // skip ahead to next loc
7598 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7599 } else
7600 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7601 Flag = Chain.getValue(1);
7602 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7603 }
7604
7605 RetOps[0] = Chain; // Update chain.
7606
7607 // Add the flag if we have it.
7608 if (Flag.getNode())
7609 RetOps.push_back(Flag);
7610
7611 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7612}
7613
7614SDValue
7615PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7616 SelectionDAG &DAG) const {
7617 SDLoc dl(Op);
7618
7619 // Get the correct type for integers.
7620 EVT IntVT = Op.getValueType();
7621
7622 // Get the inputs.
7623 SDValue Chain = Op.getOperand(0);
7624 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7625 // Build a DYNAREAOFFSET node.
7626 SDValue Ops[2] = {Chain, FPSIdx};
7627 SDVTList VTs = DAG.getVTList(IntVT);
7628 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7629}
7630
7631SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7632 SelectionDAG &DAG) const {
7633 // When we pop the dynamic allocation we need to restore the SP link.
7634 SDLoc dl(Op);
7635
7636 // Get the correct type for pointers.
7637 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7638
7639 // Construct the stack pointer operand.
7640 bool isPPC64 = Subtarget.isPPC64();
7641 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7642 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7643
7644 // Get the operands for the STACKRESTORE.
7645 SDValue Chain = Op.getOperand(0);
7646 SDValue SaveSP = Op.getOperand(1);
7647
7648 // Load the old link SP.
7649 SDValue LoadLinkSP =
7650 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7651
7652 // Restore the stack pointer.
7653 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7654
7655 // Store the old link SP.
7656 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7657}
7658
7659SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7661 bool isPPC64 = Subtarget.isPPC64();
7662 EVT PtrVT = getPointerTy(MF.getDataLayout());
7663
7664 // Get current frame pointer save index. The users of this index will be
7665 // primarily DYNALLOC instructions.
7667 int RASI = FI->getReturnAddrSaveIndex();
7668
7669 // If the frame pointer save index hasn't been defined yet.
7670 if (!RASI) {
7671 // Find out what the fix offset of the frame pointer save area.
7672 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7673 // Allocate the frame index for frame pointer save area.
7674 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7675 // Save the result.
7676 FI->setReturnAddrSaveIndex(RASI);
7677 }
7678 return DAG.getFrameIndex(RASI, PtrVT);
7679}
7680
7681SDValue
7682PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7684 bool isPPC64 = Subtarget.isPPC64();
7685 EVT PtrVT = getPointerTy(MF.getDataLayout());
7686
7687 // Get current frame pointer save index. The users of this index will be
7688 // primarily DYNALLOC instructions.
7690 int FPSI = FI->getFramePointerSaveIndex();
7691
7692 // If the frame pointer save index hasn't been defined yet.
7693 if (!FPSI) {
7694 // Find out what the fix offset of the frame pointer save area.
7695 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7696 // Allocate the frame index for frame pointer save area.
7697 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7698 // Save the result.
7699 FI->setFramePointerSaveIndex(FPSI);
7700 }
7701 return DAG.getFrameIndex(FPSI, PtrVT);
7702}
7703
7704SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7705 SelectionDAG &DAG) const {
7707 // Get the inputs.
7708 SDValue Chain = Op.getOperand(0);
7709 SDValue Size = Op.getOperand(1);
7710 SDLoc dl(Op);
7711
7712 // Get the correct type for pointers.
7713 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7714 // Negate the size.
7715 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7716 DAG.getConstant(0, dl, PtrVT), Size);
7717 // Construct a node for the frame pointer save index.
7718 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7719 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7720 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7721 if (hasInlineStackProbe(MF))
7722 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7723 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7724}
7725
7726SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7727 SelectionDAG &DAG) const {
7729
7730 bool isPPC64 = Subtarget.isPPC64();
7731 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7732
7733 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7734 return DAG.getFrameIndex(FI, PtrVT);
7735}
7736
7737SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7738 SelectionDAG &DAG) const {
7739 SDLoc DL(Op);
7740 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7742 Op.getOperand(0), Op.getOperand(1));
7743}
7744
7745SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7746 SelectionDAG &DAG) const {
7747 SDLoc DL(Op);
7749 Op.getOperand(0), Op.getOperand(1));
7750}
7751
7752SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7753 if (Op.getValueType().isVector())
7754 return LowerVectorLoad(Op, DAG);
7755
7756 assert(Op.getValueType() == MVT::i1 &&
7757 "Custom lowering only for i1 loads");
7758
7759 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7760
7761 SDLoc dl(Op);
7763
7764 SDValue Chain = LD->getChain();
7765 SDValue BasePtr = LD->getBasePtr();
7766 MachineMemOperand *MMO = LD->getMemOperand();
7767
7768 SDValue NewLD =
7769 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7770 BasePtr, MVT::i8, MMO);
7771 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7772
7773 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7774 return DAG.getMergeValues(Ops, dl);
7775}
7776
7777SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7778 if (Op.getOperand(1).getValueType().isVector())
7779 return LowerVectorStore(Op, DAG);
7780
7781 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7782 "Custom lowering only for i1 stores");
7783
7784 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7785
7786 SDLoc dl(Op);
7788
7789 SDValue Chain = ST->getChain();
7790 SDValue BasePtr = ST->getBasePtr();
7791 SDValue Value = ST->getValue();
7792 MachineMemOperand *MMO = ST->getMemOperand();
7793
7795 Value);
7796 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7797}
7798
7799// FIXME: Remove this once the ANDI glue bug is fixed:
7800SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7801 assert(Op.getValueType() == MVT::i1 &&
7802 "Custom lowering only for i1 results");
7803
7804 SDLoc DL(Op);
7805 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7806}
7807
7808SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7809 SelectionDAG &DAG) const {
7810
7811 // Implements a vector truncate that fits in a vector register as a shuffle.
7812 // We want to legalize vector truncates down to where the source fits in
7813 // a vector register (and target is therefore smaller than vector register
7814 // size). At that point legalization will try to custom lower the sub-legal
7815 // result and get here - where we can contain the truncate as a single target
7816 // operation.
7817
7818 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7819 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7820 //
7821 // We will implement it for big-endian ordering as this (where x denotes
7822 // undefined):
7823 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7824 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7825 //
7826 // The same operation in little-endian ordering will be:
7827 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7828 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7829
7830 EVT TrgVT = Op.getValueType();
7831 assert(TrgVT.isVector() && "Vector type expected.");
7832 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7833 EVT EltVT = TrgVT.getVectorElementType();
7834 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7835 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7836 !isPowerOf2_32(EltVT.getSizeInBits()))
7837 return SDValue();
7838
7839 SDValue N1 = Op.getOperand(0);
7840 EVT SrcVT = N1.getValueType();
7841 unsigned SrcSize = SrcVT.getSizeInBits();
7842 if (SrcSize > 256 ||
7845 return SDValue();
7846 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7847 return SDValue();
7848
7849 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7850 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7851
7852 SDLoc DL(Op);
7853 SDValue Op1, Op2;
7854 if (SrcSize == 256) {
7855 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7856 EVT SplitVT =
7858 unsigned SplitNumElts = SplitVT.getVectorNumElements();
7859 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7860 DAG.getConstant(0, DL, VecIdxTy));
7861 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7862 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7863 }
7864 else {
7865 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7866 Op2 = DAG.getUNDEF(WideVT);
7867 }
7868
7869 // First list the elements we want to keep.
7870 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7871 SmallVector<int, 16> ShuffV;
7872 if (Subtarget.isLittleEndian())
7873 for (unsigned i = 0; i < TrgNumElts; ++i)
7874 ShuffV.push_back(i * SizeMult);
7875 else
7876 for (unsigned i = 1; i <= TrgNumElts; ++i)
7877 ShuffV.push_back(i * SizeMult - 1);
7878
7879 // Populate the remaining elements with undefs.
7880 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7881 // ShuffV.push_back(i + WideNumElts);
7882 ShuffV.push_back(WideNumElts + 1);
7883
7884 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7885 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7886 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7887}
7888
7889/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7890/// possible.
7891SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7892 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7893 EVT ResVT = Op.getValueType();
7894 EVT CmpVT = Op.getOperand(0).getValueType();
7895 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7896 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7897 SDLoc dl(Op);
7898
7899 // Without power9-vector, we don't have native instruction for f128 comparison.
7900 // Following transformation to libcall is needed for setcc:
7901 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
7902 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
7903 SDValue Z = DAG.getSetCC(
7904 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
7905 LHS, RHS, CC);
7906 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
7907 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
7908 }
7909
7910 // Not FP, or using SPE? Not a fsel.
7911 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
7912 Subtarget.hasSPE())
7913 return Op;
7914
7915 SDNodeFlags Flags = Op.getNode()->getFlags();
7916
7917 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
7918 // presence of infinities.
7919 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7920 switch (CC) {
7921 default:
7922 break;
7923 case ISD::SETOGT:
7924 case ISD::SETGT:
7925 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
7926 case ISD::SETOLT:
7927 case ISD::SETLT:
7928 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
7929 }
7930 }
7931
7932 // We might be able to do better than this under some circumstances, but in
7933 // general, fsel-based lowering of select is a finite-math-only optimization.
7934 // For more information, see section F.3 of the 2.06 ISA specification.
7935 // With ISA 3.0
7936 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7937 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7938 return Op;
7939
7940 // If the RHS of the comparison is a 0.0, we don't need to do the
7941 // subtraction at all.
7942 SDValue Sel1;
7943 if (isFloatingPointZero(RHS))
7944 switch (CC) {
7945 default: break; // SETUO etc aren't handled by fsel.
7946 case ISD::SETNE:
7947 std::swap(TV, FV);
7949 case ISD::SETEQ:
7950 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7951 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7952 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7953 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7954 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7955 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7956 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7957 case ISD::SETULT:
7958 case ISD::SETLT:
7959 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7961 case ISD::SETOGE:
7962 case ISD::SETGE:
7963 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7964 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7965 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7966 case ISD::SETUGT:
7967 case ISD::SETGT:
7968 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7970 case ISD::SETOLE:
7971 case ISD::SETLE:
7972 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7973 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7974 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7975 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7976 }
7977
7978 SDValue Cmp;
7979 switch (CC) {
7980 default: break; // SETUO etc aren't handled by fsel.
7981 case ISD::SETNE:
7982 std::swap(TV, FV);
7984 case ISD::SETEQ:
7985 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7986 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7987 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7988 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7989 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7990 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7991 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7992 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7993 case ISD::SETULT:
7994 case ISD::SETLT:
7995 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7996 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7997 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7998 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7999 case ISD::SETOGE:
8000 case ISD::SETGE:
8001 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8002 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8003 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8004 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8005 case ISD::SETUGT:
8006 case ISD::SETGT:
8007 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8008 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8009 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8010 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8011 case ISD::SETOLE:
8012 case ISD::SETLE:
8013 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8014 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8015 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8016 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8017 }
8018 return Op;
8019}
8020
8021static unsigned getPPCStrictOpcode(unsigned Opc) {
8022 switch (Opc) {
8023 default:
8024 llvm_unreachable("No strict version of this opcode!");
8025 case PPCISD::FCTIDZ:
8026 return PPCISD::STRICT_FCTIDZ;
8027 case PPCISD::FCTIWZ:
8028 return PPCISD::STRICT_FCTIWZ;
8029 case PPCISD::FCTIDUZ:
8031 case PPCISD::FCTIWUZ:
8033 case PPCISD::FCFID:
8034 return PPCISD::STRICT_FCFID;
8035 case PPCISD::FCFIDU:
8036 return PPCISD::STRICT_FCFIDU;
8037 case PPCISD::FCFIDS:
8038 return PPCISD::STRICT_FCFIDS;
8039 case PPCISD::FCFIDUS:
8041 }
8042}
8043
8045 const PPCSubtarget &Subtarget) {
8046 SDLoc dl(Op);
8047 bool IsStrict = Op->isStrictFPOpcode();
8048 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8049 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8050
8051 // TODO: Any other flags to propagate?
8052 SDNodeFlags Flags;
8053 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8054
8055 // For strict nodes, source is the second operand.
8056 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8057 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8058 assert(Src.getValueType().isFloatingPoint());
8059 if (Src.getValueType() == MVT::f32) {
8060 if (IsStrict) {
8061 Src =
8063 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8064 Chain = Src.getValue(1);
8065 } else
8066 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8067 }
8068 SDValue Conv;
8069 unsigned Opc = ISD::DELETED_NODE;
8070 switch (Op.getSimpleValueType().SimpleTy) {
8071 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8072 case MVT::i32:
8073 Opc = IsSigned ? PPCISD::FCTIWZ
8074 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8075 break;
8076 case MVT::i64:
8077 assert((IsSigned || Subtarget.hasFPCVT()) &&
8078 "i64 FP_TO_UINT is supported only with FPCVT");
8079 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8080 }
8081 if (IsStrict) {
8082 Opc = getPPCStrictOpcode(Opc);
8083 Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8084 {Chain, Src}, Flags);
8085 } else {
8086 Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8087 }
8088 return Conv;
8089}
8090
8091void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8092 SelectionDAG &DAG,
8093 const SDLoc &dl) const {
8094 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8095 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8096 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8097 bool IsStrict = Op->isStrictFPOpcode();
8098
8099 // Convert the FP value to an int value through memory.
8100 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8101 (IsSigned || Subtarget.hasFPCVT());
8102 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8103 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8104 MachinePointerInfo MPI =
8106
8107 // Emit a store to the stack slot.
8108 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8110 if (i32Stack) {
8112 Alignment = Align(4);
8113 MachineMemOperand *MMO =
8114 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8115 SDValue Ops[] = { Chain, Tmp, FIPtr };
8116 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8117 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8118 } else
8119 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8120
8121 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8122 // add in a bias on big endian.
8123 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8124 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8125 DAG.getConstant(4, dl, FIPtr.getValueType()));
8126 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8127 }
8128
8129 RLI.Chain = Chain;
8130 RLI.Ptr = FIPtr;
8131 RLI.MPI = MPI;
8132 RLI.Alignment = Alignment;
8133}
8134
8135/// Custom lowers floating point to integer conversions to use
8136/// the direct move instructions available in ISA 2.07 to avoid the
8137/// need for load/store combinations.
8138SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8139 SelectionDAG &DAG,
8140 const SDLoc &dl) const {
8141 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8142 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8143 if (Op->isStrictFPOpcode())
8144 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8145 else
8146 return Mov;
8147}
8148
8149SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8150 const SDLoc &dl) const {
8151 bool IsStrict = Op->isStrictFPOpcode();
8152 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8153 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8154 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8155 EVT SrcVT = Src.getValueType();
8156 EVT DstVT = Op.getValueType();
8157
8158 // FP to INT conversions are legal for f128.
8159 if (SrcVT == MVT::f128)
8160 return Subtarget.hasP9Vector() ? Op : SDValue();
8161
8162 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8163 // PPC (the libcall is not available).
8164 if (SrcVT == MVT::ppcf128) {
8165 if (DstVT == MVT::i32) {
8166 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8167 // set other fast-math flags to FP operations in both strict and
8168 // non-strict cases. (FP_TO_SINT, FSUB)
8169 SDNodeFlags Flags;
8170 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8171
8172 if (IsSigned) {
8174 DAG.getIntPtrConstant(0, dl));
8176 DAG.getIntPtrConstant(1, dl));
8177
8178 // Add the two halves of the long double in round-to-zero mode, and use
8179 // a smaller FP_TO_SINT.
8180 if (IsStrict) {
8183 {Op.getOperand(0), Lo, Hi}, Flags);
8184 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8186 {Res.getValue(1), Res}, Flags);
8187 } else {
8188 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8189 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8190 }
8191 } else {
8192 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8193 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8194 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8195 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8196 if (IsStrict) {
8197 // Sel = Src < 0x80000000
8198 // FltOfs = select Sel, 0.0, 0x80000000
8199 // IntOfs = select Sel, 0, 0x80000000
8200 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8201 SDValue Chain = Op.getOperand(0);
8202 EVT SetCCVT =
8203 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8204 EVT DstSetCCVT =
8205 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8206 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8207 Chain, true);
8208 Chain = Sel.getValue(1);
8209
8210 SDValue FltOfs = DAG.getSelect(
8211 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8212 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8213
8214 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8215 DAG.getVTList(SrcVT, MVT::Other),
8216 {Chain, Src, FltOfs}, Flags);
8217 Chain = Val.getValue(1);
8218 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8219 DAG.getVTList(DstVT, MVT::Other),
8220 {Chain, Val}, Flags);
8221 Chain = SInt.getValue(1);
8222 SDValue IntOfs = DAG.getSelect(
8223 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8224 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8225 return DAG.getMergeValues({Result, Chain}, dl);
8226 } else {
8227 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8228 // FIXME: generated code sucks.
8229 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8230 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8231 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8232 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8233 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8234 }
8235 }
8236 }
8237
8238 return SDValue();
8239 }
8240
8241 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8242 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8243
8244 ReuseLoadInfo RLI;
8245 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8246
8247 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8248 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8249}
8250
8251// We're trying to insert a regular store, S, and then a load, L. If the
8252// incoming value, O, is a load, we might just be able to have our load use the
8253// address used by O. However, we don't know if anything else will store to
8254// that address before we can load from it. To prevent this situation, we need
8255// to insert our load, L, into the chain as a peer of O. To do this, we give L
8256// the same chain operand as O, we create a token factor from the chain results
8257// of O and L, and we replace all uses of O's chain result with that token
8258// factor (see spliceIntoChain below for this last part).
8259bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8260 ReuseLoadInfo &RLI,
8261 SelectionDAG &DAG,
8262 ISD::LoadExtType ET) const {
8263 // Conservatively skip reusing for constrained FP nodes.
8264 if (Op->isStrictFPOpcode())
8265 return false;
8266
8267 SDLoc dl(Op);
8268 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8269 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8270 if (ET == ISD::NON_EXTLOAD &&
8271 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8272 isOperationLegalOrCustom(Op.getOpcode(),
8273 Op.getOperand(0).getValueType())) {
8274
8275 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8276 return true;
8277 }
8278
8280 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8281 LD->isNonTemporal())
8282 return false;
8283 if (LD->getMemoryVT() != MemVT)
8284 return false;
8285
8286 // If the result of the load is an illegal type, then we can't build a
8287 // valid chain for reuse since the legalised loads and token factor node that
8288 // ties the legalised loads together uses a different output chain then the
8289 // illegal load.
8290 if (!isTypeLegal(LD->getValueType(0)))
8291 return false;
8292
8293 RLI.Ptr = LD->getBasePtr();
8294 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8295 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8296 "Non-pre-inc AM on PPC?");
8297 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8298 LD->getOffset());
8299 }
8300
8301 RLI.Chain = LD->getChain();
8302 RLI.MPI = LD->getPointerInfo();
8303 RLI.IsDereferenceable = LD->isDereferenceable();
8304 RLI.IsInvariant = LD->isInvariant();
8305 RLI.Alignment = LD->getAlign();
8306 RLI.AAInfo = LD->getAAInfo();
8307 RLI.Ranges = LD->getRanges();
8308
8309 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8310 return true;
8311}
8312
8313// Given the head of the old chain, ResChain, insert a token factor containing
8314// it and NewResChain, and make users of ResChain now be users of that token
8315// factor.
8316// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8317void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8318 SDValue NewResChain,
8319 SelectionDAG &DAG) const {
8320 if (!ResChain)
8321 return;
8322
8323 SDLoc dl(NewResChain);
8324
8326 NewResChain, DAG.getUNDEF(MVT::Other));
8327 assert(TF.getNode() != NewResChain.getNode() &&
8328 "A new TF really is required here");
8329
8330 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8331 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8332}
8333
8334/// Analyze profitability of direct move
8335/// prefer float load to int load plus direct move
8336/// when there is no integer use of int load
8337bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8338 SDNode *Origin = Op.getOperand(0).getNode();
8339 if (Origin->getOpcode() != ISD::LOAD)
8340 return true;
8341
8342 // If there is no LXSIBZX/LXSIHZX, like Power8,
8343 // prefer direct move if the memory size is 1 or 2 bytes.
8344 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8345 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8346 return true;
8347
8348 for (SDNode::use_iterator UI = Origin->use_begin(),
8349 UE = Origin->use_end();
8350 UI != UE; ++UI) {
8351
8352 // Only look at the users of the loaded value.
8353 if (UI.getUse().get().getResNo() != 0)
8354 continue;
8355
8356 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8357 UI->getOpcode() != ISD::UINT_TO_FP &&
8358 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8359 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8360 return true;
8361 }
8362
8363 return false;
8364}
8365
8367 const PPCSubtarget &Subtarget,
8368 SDValue Chain = SDValue()) {
8369 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8370 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8371 SDLoc dl(Op);
8372
8373 // TODO: Any other flags to propagate?
8374 SDNodeFlags Flags;
8375 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8376
8377 // If we have FCFIDS, then use it when converting to single-precision.
8378 // Otherwise, convert to double-precision and then round.
8379 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8380 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8381 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8382 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8383 if (Op->isStrictFPOpcode()) {
8384 if (!Chain)
8385 Chain = Op.getOperand(0);
8386 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8387 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8388 } else
8389 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8390}
8391
8392/// Custom lowers integer to floating point conversions to use
8393/// the direct move instructions available in ISA 2.07 to avoid the
8394/// need for load/store combinations.
8395SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8396 SelectionDAG &DAG,
8397 const SDLoc &dl) const {
8398 assert((Op.getValueType() == MVT::f32 ||
8399 Op.getValueType() == MVT::f64) &&
8400 "Invalid floating point type as target of conversion");
8401 assert(Subtarget.hasFPCVT() &&
8402 "Int to FP conversions with direct moves require FPCVT");
8403 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8404 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8405 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8406 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8407 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8408 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8409 return convertIntToFP(Op, Mov, DAG, Subtarget);
8410}
8411
8412static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8413
8414 EVT VecVT = Vec.getValueType();
8415 assert(VecVT.isVector() && "Expected a vector type.");
8416 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8417
8418 EVT EltVT = VecVT.getVectorElementType();
8419 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8420 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8421
8422 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8423 SmallVector<SDValue, 16> Ops(NumConcat);
8424 Ops[0] = Vec;
8425 SDValue UndefVec = DAG.getUNDEF(VecVT);
8426 for (unsigned i = 1; i < NumConcat; ++i)
8427 Ops[i] = UndefVec;
8428
8429 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8430}
8431
8432SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8433 const SDLoc &dl) const {
8434 bool IsStrict = Op->isStrictFPOpcode();
8435 unsigned Opc = Op.getOpcode();
8436 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8437 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8439 "Unexpected conversion type");
8440 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8441 "Supports conversions to v2f64/v4f32 only.");
8442
8443 // TODO: Any other flags to propagate?
8444 SDNodeFlags Flags;
8445 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8446
8447 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8448 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8449
8450 SDValue Wide = widenVec(DAG, Src, dl);
8451 EVT WideVT = Wide.getValueType();
8452 unsigned WideNumElts = WideVT.getVectorNumElements();
8453 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8454
8455 SmallVector<int, 16> ShuffV;
8456 for (unsigned i = 0; i < WideNumElts; ++i)
8457 ShuffV.push_back(i + WideNumElts);
8458
8459 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8460 int SaveElts = FourEltRes ? 4 : 2;
8461 if (Subtarget.isLittleEndian())
8462 for (int i = 0; i < SaveElts; i++)
8463 ShuffV[i * Stride] = i;
8464 else
8465 for (int i = 1; i <= SaveElts; i++)
8466 ShuffV[i * Stride - 1] = i - 1;
8467
8468 SDValue ShuffleSrc2 =
8469 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8470 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8471
8472 SDValue Extend;
8473 if (SignedConv) {
8474 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8475 EVT ExtVT = Src.getValueType();
8476 if (Subtarget.hasP9Altivec())
8477 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8478 IntermediateVT.getVectorNumElements());
8479
8480 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8481 DAG.getValueType(ExtVT));
8482 } else
8483 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8484
8485 if (IsStrict)
8486 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8487 {Op.getOperand(0), Extend}, Flags);
8488
8489 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8490}
8491
8492SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8493 SelectionDAG &DAG) const {
8494 SDLoc dl(Op);
8495 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8496 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8497 bool IsStrict = Op->isStrictFPOpcode();
8498 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8499 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8500
8501 // TODO: Any other flags to propagate?
8502 SDNodeFlags Flags;
8503 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8504
8505 EVT InVT = Src.getValueType();
8506 EVT OutVT = Op.getValueType();
8507 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8508 isOperationCustom(Op.getOpcode(), InVT))
8509 return LowerINT_TO_FPVector(Op, DAG, dl);
8510
8511 // Conversions to f128 are legal.
8512 if (Op.getValueType() == MVT::f128)
8513 return Subtarget.hasP9Vector() ? Op : SDValue();
8514
8515 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8516 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8517 return SDValue();
8518
8519 if (Src.getValueType() == MVT::i1) {
8520 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8521 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8522 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8523 if (IsStrict)
8524 return DAG.getMergeValues({Sel, Chain}, dl);
8525 else
8526 return Sel;
8527 }
8528
8529 // If we have direct moves, we can do all the conversion, skip the store/load
8530 // however, without FPCVT we can't do most conversions.
8531 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8532 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8533 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8534
8535 assert((IsSigned || Subtarget.hasFPCVT()) &&
8536 "UINT_TO_FP is supported only with FPCVT");
8537
8538 if (Src.getValueType() == MVT::i64) {
8539 SDValue SINT = Src;
8540 // When converting to single-precision, we actually need to convert
8541 // to double-precision first and then round to single-precision.
8542 // To avoid double-rounding effects during that operation, we have
8543 // to prepare the input operand. Bits that might be truncated when
8544 // converting to double-precision are replaced by a bit that won't
8545 // be lost at this stage, but is below the single-precision rounding
8546 // position.
8547 //
8548 // However, if -enable-unsafe-fp-math is in effect, accept double
8549 // rounding to avoid the extra overhead.
8550 if (Op.getValueType() == MVT::f32 &&
8551 !Subtarget.hasFPCVT() &&
8553
8554 // Twiddle input to make sure the low 11 bits are zero. (If this
8555 // is the case, we are guaranteed the value will fit into the 53 bit
8556 // mantissa of an IEEE double-precision value without rounding.)
8557 // If any of those low 11 bits were not zero originally, make sure
8558 // bit 12 (value 2048) is set instead, so that the final rounding
8559 // to single-precision gets the correct result.
8560 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8561 SINT, DAG.getConstant(2047, dl, MVT::i64));
8562 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8563 Round, DAG.getConstant(2047, dl, MVT::i64));
8564 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8565 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8566 Round, DAG.getConstant(-2048, dl, MVT::i64));
8567
8568 // However, we cannot use that value unconditionally: if the magnitude
8569 // of the input value is small, the bit-twiddling we did above might
8570 // end up visibly changing the output. Fortunately, in that case, we
8571 // don't need to twiddle bits since the original input will convert
8572 // exactly to double-precision floating-point already. Therefore,
8573 // construct a conditional to use the original value if the top 11
8574 // bits are all sign-bit copies, and use the rounded value computed
8575 // above otherwise.
8577 SINT, DAG.getConstant(53, dl, MVT::i32));
8578 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8579 Cond, DAG.getConstant(1, dl, MVT::i64));
8580 Cond = DAG.getSetCC(
8581 dl,
8583 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8584
8585 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8586 }
8587
8588 ReuseLoadInfo RLI;
8589 SDValue Bits;
8590
8592 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8593 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8594 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8595 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8596 } else if (Subtarget.hasLFIWAX() &&
8597 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8598 MachineMemOperand *MMO =
8600 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8601 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8604 Ops, MVT::i32, MMO);
8605 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8606 } else if (Subtarget.hasFPCVT() &&
8607 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8608 MachineMemOperand *MMO =
8610 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8611 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8614 Ops, MVT::i32, MMO);
8615 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8616 } else if (((Subtarget.hasLFIWAX() &&
8617 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8618 (Subtarget.hasFPCVT() &&
8619 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8620 SINT.getOperand(0).getValueType() == MVT::i32) {
8621 MachineFrameInfo &MFI = MF.getFrameInfo();
8622 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8623
8624 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8625 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8626
8627 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8629 DAG.getMachineFunction(), FrameIdx));
8630 Chain = Store;
8631
8632 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8633 "Expected an i32 store");
8634
8635 RLI.Ptr = FIdx;
8636 RLI.Chain = Chain;
8637 RLI.MPI =
8639 RLI.Alignment = Align(4);
8640
8641 MachineMemOperand *MMO =
8643 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8644 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8647 dl, DAG.getVTList(MVT::f64, MVT::Other),
8648 Ops, MVT::i32, MMO);
8649 Chain = Bits.getValue(1);
8650 } else
8651 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8652
8653 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8654 if (IsStrict)
8655 Chain = FP.getValue(1);
8656
8657 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8658 if (IsStrict)
8661 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8662 else
8663 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8664 DAG.getIntPtrConstant(0, dl));
8665 }
8666 return FP;
8667 }
8668
8669 assert(Src.getValueType() == MVT::i32 &&
8670 "Unhandled INT_TO_FP type in custom expander!");
8671 // Since we only generate this in 64-bit mode, we can take advantage of
8672 // 64-bit registers. In particular, sign extend the input value into the
8673 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8674 // then lfd it and fcfid it.
8676 MachineFrameInfo &MFI = MF.getFrameInfo();
8677 EVT PtrVT = getPointerTy(MF.getDataLayout());
8678
8679 SDValue Ld;
8680 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8681 ReuseLoadInfo RLI;
8682 bool ReusingLoad;
8683 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8684 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8685 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8686
8687 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8689 DAG.getMachineFunction(), FrameIdx));
8690 Chain = Store;
8691
8692 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8693 "Expected an i32 store");
8694
8695 RLI.Ptr = FIdx;
8696 RLI.Chain = Chain;
8697 RLI.MPI =
8699 RLI.Alignment = Align(4);
8700 }
8701
8702 MachineMemOperand *MMO =
8704 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8705 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8706 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8707 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8708 MVT::i32, MMO);
8709 Chain = Ld.getValue(1);
8710 if (ReusingLoad)
8711 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8712 } else {
8713 assert(Subtarget.isPPC64() &&
8714 "i32->FP without LFIWAX supported only on PPC64");
8715
8716 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8717 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8718
8719 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8720
8721 // STD the extended value into the stack slot.
8722 SDValue Store = DAG.getStore(
8723 Chain, dl, Ext64, FIdx,
8725 Chain = Store;
8726
8727 // Load the value as a double.
8728 Ld = DAG.getLoad(
8729 MVT::f64, dl, Chain, FIdx,
8731 Chain = Ld.getValue(1);
8732 }
8733
8734 // FCFID it and return it.
8735 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8736 if (IsStrict)
8737 Chain = FP.getValue(1);
8738 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8739 if (IsStrict)
8742 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8743 else
8744 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8745 DAG.getIntPtrConstant(0, dl));
8746 }
8747 return FP;
8748}
8749
8750SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8751 SelectionDAG &DAG) const {
8752 SDLoc dl(Op);
8753 /*
8754 The rounding mode is in bits 30:31 of FPSR, and has the following
8755 settings:
8756 00 Round to nearest
8757 01 Round to 0
8758 10 Round to +inf
8759 11 Round to -inf
8760
8761 FLT_ROUNDS, on the other hand, expects the following:
8762 -1 Undefined
8763 0 Round to 0
8764 1 Round to nearest
8765 2 Round to +inf
8766 3 Round to -inf
8767
8768 To perform the conversion, we do:
8769 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8770 */
8771
8773 EVT VT = Op.getValueType();
8774 EVT PtrVT = getPointerTy(MF.getDataLayout());
8775
8776 // Save FP Control Word to register
8777 SDValue Chain = Op.getOperand(0);
8778 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8779 Chain = MFFS.getValue(1);
8780
8781 SDValue CWD;
8782 if (isTypeLegal(MVT::i64)) {
8783 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8784 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8785 } else {
8786 // Save FP register to stack slot
8787 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8788 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8789 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8790
8791 // Load FP Control Word from low 32 bits of stack slot.
8793 "Stack slot adjustment is valid only on big endian subtargets!");
8794 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8795 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8796 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8797 Chain = CWD.getValue(1);
8798 }
8799
8800 // Transform as necessary
8801 SDValue CWD1 =
8802 DAG.getNode(ISD::AND, dl, MVT::i32,
8803 CWD, DAG.getConstant(3, dl, MVT::i32));
8804 SDValue CWD2 =
8805 DAG.getNode(ISD::SRL, dl, MVT::i32,
8806 DAG.getNode(ISD::AND, dl, MVT::i32,
8807 DAG.getNode(ISD::XOR, dl, MVT::i32,
8808 CWD, DAG.getConstant(3, dl, MVT::i32)),
8809 DAG.getConstant(3, dl, MVT::i32)),
8810 DAG.getConstant(1, dl, MVT::i32));
8811
8812 SDValue RetVal =
8813 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8814
8815 RetVal =
8817 dl, VT, RetVal);
8818
8819 return DAG.getMergeValues({RetVal, Chain}, dl);
8820}
8821
8822SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8823 EVT VT = Op.getValueType();
8824 unsigned BitWidth = VT.getSizeInBits();
8825 SDLoc dl(Op);
8826 assert(Op.getNumOperands() == 3 &&
8827 VT == Op.getOperand(1).getValueType() &&
8828 "Unexpected SHL!");
8829
8830 // Expand into a bunch of logical ops. Note that these ops
8831 // depend on the PPC behavior for oversized shift amounts.
8832 SDValue Lo = Op.getOperand(0);
8833 SDValue Hi = Op.getOperand(1);
8834 SDValue Amt = Op.getOperand(2);
8835 EVT AmtVT = Amt.getValueType();
8836
8837 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8838 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8839 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8840 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8841 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8842 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8843 DAG.getConstant(-BitWidth, dl, AmtVT));
8844 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8845 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8846 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8847 SDValue OutOps[] = { OutLo, OutHi };
8848 return DAG.getMergeValues(OutOps, dl);
8849}
8850
8851SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8852 EVT VT = Op.getValueType();
8853 SDLoc dl(Op);
8854 unsigned BitWidth = VT.getSizeInBits();
8855 assert(Op.getNumOperands() == 3 &&
8856 VT == Op.getOperand(1).getValueType() &&
8857 "Unexpected SRL!");
8858
8859 // Expand into a bunch of logical ops. Note that these ops
8860 // depend on the PPC behavior for oversized shift amounts.
8861 SDValue Lo = Op.getOperand(0);
8862 SDValue Hi = Op.getOperand(1);
8863 SDValue Amt = Op.getOperand(2);
8864 EVT AmtVT = Amt.getValueType();
8865
8866 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8867 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8868 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8869 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8870 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8871 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8872 DAG.getConstant(-BitWidth, dl, AmtVT));
8873 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8874 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8875 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8876 SDValue OutOps[] = { OutLo, OutHi };
8877 return DAG.getMergeValues(OutOps, dl);
8878}
8879
8880SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8881 SDLoc dl(Op);
8882 EVT VT = Op.getValueType();
8883 unsigned BitWidth = VT.getSizeInBits();
8884 assert(Op.getNumOperands() == 3 &&
8885 VT == Op.getOperand(1).getValueType() &&
8886 "Unexpected SRA!");
8887
8888 // Expand into a bunch of logical ops, followed by a select_cc.
8889 SDValue Lo = Op.getOperand(0);
8890 SDValue Hi = Op.getOperand(1);
8891 SDValue Amt = Op.getOperand(2);
8892 EVT AmtVT = Amt.getValueType();
8893
8894 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8895 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8896 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8897 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8898 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8899 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8900 DAG.getConstant(-BitWidth, dl, AmtVT));
8901 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8902 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8903 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8904 Tmp4, Tmp6, ISD::SETLE);
8905 SDValue OutOps[] = { OutLo, OutHi };
8906 return DAG.getMergeValues(OutOps, dl);
8907}
8908
8909SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8910 SelectionDAG &DAG) const {
8911 SDLoc dl(Op);
8912 EVT VT = Op.getValueType();
8913 unsigned BitWidth = VT.getSizeInBits();
8914
8915 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8916 SDValue X = Op.getOperand(0);
8917 SDValue Y = Op.getOperand(1);
8918 SDValue Z = Op.getOperand(2);
8919 EVT AmtVT = Z.getValueType();
8920
8921 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8922 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8923 // This is simpler than TargetLowering::expandFunnelShift because we can rely
8924 // on PowerPC shift by BW being well defined.
8925 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8926 DAG.getConstant(BitWidth - 1, dl, AmtVT));
8927 SDValue SubZ =
8928 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8929 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8930 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8931 return DAG.getNode(ISD::OR, dl, VT, X, Y);
8932}
8933
8934//===----------------------------------------------------------------------===//
8935// Vector related lowering.
8936//
8937
8938/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8939/// element size of SplatSize. Cast the result to VT.
8940static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8941 SelectionDAG &DAG, const SDLoc &dl) {
8942 static const MVT VTys[] = { // canonical VT to use for each size.
8944 };
8945
8946 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8947
8948 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8949 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8950 SplatSize = 1;
8951 Val = 0xFF;
8952 }
8953
8954 EVT CanonicalVT = VTys[SplatSize-1];
8955
8956 // Build a canonical splat for this value.
8957 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8958}
8959
8960/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8961/// specified intrinsic ID.
8962static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8963 const SDLoc &dl, EVT DestVT = MVT::Other) {
8964 if (DestVT == MVT::Other) DestVT = Op.getValueType();
8965 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8966 DAG.getConstant(IID, dl, MVT::i32), Op);
8967}
8968
8969/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8970/// specified intrinsic ID.
8971static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8972 SelectionDAG &DAG, const SDLoc &dl,
8973 EVT DestVT = MVT::Other) {
8974 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8975 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8976 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8977}
8978
8979/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8980/// specified intrinsic ID.
8981static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8982 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8983 EVT DestVT = MVT::Other) {
8984 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8985 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8986 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8987}
8988
8989/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8990/// amount. The result has the specified value type.
8991static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8992 SelectionDAG &DAG, const SDLoc &dl) {
8993 // Force LHS/RHS to be the right type.
8994 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8995 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8996
8997 int Ops[16];
8998 for (unsigned i = 0; i != 16; ++i)
8999 Ops[i] = i + Amt;
9000 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9001 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9002}
9003
9004/// Do we have an efficient pattern in a .td file for this node?
9005///
9006/// \param V - pointer to the BuildVectorSDNode being matched
9007/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9008///
9009/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9010/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9011/// the opposite is true (expansion is beneficial) are:
9012/// - The node builds a vector out of integers that are not 32 or 64-bits
9013/// - The node builds a vector out of constants
9014/// - The node is a "load-and-splat"
9015/// In all other cases, we will choose to keep the BUILD_VECTOR.
9017 bool HasDirectMove,
9018 bool HasP8Vector) {
9019 EVT VecVT = V->getValueType(0);
9020 bool RightType = VecVT == MVT::v2f64 ||
9021 (HasP8Vector && VecVT == MVT::v4f32) ||
9022 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9023 if (!RightType)
9024 return false;
9025
9026 bool IsSplat = true;
9027 bool IsLoad = false;
9028 SDValue Op0 = V->getOperand(0);
9029
9030 // This function is called in a block that confirms the node is not a constant
9031 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9032 // different constants.
9033 if (V->isConstant())
9034 return false;
9035 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9036 if (V->getOperand(i).isUndef())
9037 return false;
9038 // We want to expand nodes that represent load-and-splat even if the
9039 // loaded value is a floating point truncation or conversion to int.
9040 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9041 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9042 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9043 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9044 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9045 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9046 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9047 IsLoad = true;
9048 // If the operands are different or the input is not a load and has more
9049 // uses than just this BV node, then it isn't a splat.
9050 if (V->getOperand(i) != Op0 ||
9051 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9052 IsSplat = false;
9053 }
9054 return !(IsSplat && IsLoad);
9055}
9056
9057// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9058SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9059
9060 SDLoc dl(Op);
9061 SDValue Op0 = Op->getOperand(0);
9062
9063 if ((Op.getValueType() != MVT::f128) ||
9064 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9065 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9066 (Op0.getOperand(1).getValueType() != MVT::i64))
9067 return SDValue();
9068
9069 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9070 Op0.getOperand(1));
9071}
9072
9073static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9074 const SDValue *InputLoad = &Op;
9075 while (InputLoad->getOpcode() == ISD::BITCAST)
9076 InputLoad = &InputLoad->getOperand(0);
9077 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9079 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9080 InputLoad = &InputLoad->getOperand(0);
9081 }
9082 if (InputLoad->getOpcode() != ISD::LOAD)
9083 return nullptr;
9084 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9085 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9086}
9087
9088// Convert the argument APFloat to a single precision APFloat if there is no
9089// loss in information during the conversion to single precision APFloat and the
9090// resulting number is not a denormal number. Return true if successful.
9092 APFloat APFloatToConvert = ArgAPFloat;
9093 bool LosesInfo = true;
9095 &LosesInfo);
9096 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9097 if (Success)
9098 ArgAPFloat = APFloatToConvert;
9099 return Success;
9100}
9101
9102// Bitcast the argument APInt to a double and convert it to a single precision
9103// APFloat, bitcast the APFloat to an APInt and assign it to the original
9104// argument if there is no loss in information during the conversion from
9105// double to single precision APFloat and the resulting number is not a denormal
9106// number. Return true if successful.
9108 double DpValue = ArgAPInt.bitsToDouble();
9109 APFloat APFloatDp(DpValue);
9110 bool Success = convertToNonDenormSingle(APFloatDp);
9111 if (Success)
9112 ArgAPInt = APFloatDp.bitcastToAPInt();
9113 return Success;
9114}
9115
9116// Nondestructive check for convertTonNonDenormSingle.
9118 // Only convert if it loses info, since XXSPLTIDP should
9119 // handle the other case.
9120 APFloat APFloatToConvert = ArgAPFloat;
9121 bool LosesInfo = true;
9123 &LosesInfo);
9124
9125 return (!LosesInfo && !APFloatToConvert.isDenormal());
9126}
9127
9128static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9129 unsigned &Opcode) {
9130 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9131 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9132 return false;
9133
9134 EVT Ty = Op->getValueType(0);
9135 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9136 // as we cannot handle extending loads for these types.
9137 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9138 ISD::isNON_EXTLoad(InputNode))
9139 return true;
9140
9141 EVT MemVT = InputNode->getMemoryVT();
9142 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9143 // memory VT is the same vector element VT type.
9144 // The loads feeding into the v8i16 and v16i8 types will be extending because
9145 // scalar i8/i16 are not legal types.
9146 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9147 (MemVT == Ty.getVectorElementType()))
9148 return true;
9149
9150 if (Ty == MVT::v2i64) {
9151 // Check the extend type, when the input type is i32, and the output vector
9152 // type is v2i64.
9153 if (MemVT == MVT::i32) {
9154 if (ISD::isZEXTLoad(InputNode))
9155 Opcode = PPCISD::ZEXT_LD_SPLAT;
9156 if (ISD::isSEXTLoad(InputNode))
9157 Opcode = PPCISD::SEXT_LD_SPLAT;
9158 }
9159 return true;
9160 }
9161 return false;
9162}
9163
9164// If this is a case we can't handle, return null and let the default
9165// expansion code take care of it. If we CAN select this case, and if it
9166// selects to a single instruction, return Op. Otherwise, if we can codegen
9167// this case more efficiently than a constant pool load, lower it to the
9168// sequence of ops that should be used.
9169SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9170 SelectionDAG &DAG) const {
9171 SDLoc dl(Op);
9173 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9174
9175 // Check if this is a splat of a constant value.
9176 APInt APSplatBits, APSplatUndef;
9177 unsigned SplatBitSize;
9178 bool HasAnyUndefs;
9179 bool BVNIsConstantSplat =
9180 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9181 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9182
9183 // If it is a splat of a double, check if we can shrink it to a 32 bit
9184 // non-denormal float which when converted back to double gives us the same
9185 // double. This is to exploit the XXSPLTIDP instruction.
9186 // If we lose precision, we use XXSPLTI32DX.
9187 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9188 Subtarget.hasPrefixInstrs()) {
9189 // Check the type first to short-circuit so we don't modify APSplatBits if
9190 // this block isn't executed.
9191 if ((Op->getValueType(0) == MVT::v2f64) &&
9192 convertToNonDenormSingle(APSplatBits)) {
9193 SDValue SplatNode = DAG.getNode(
9195 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9196 return DAG.getBitcast(Op.getValueType(), SplatNode);
9197 } else {
9198 // We may lose precision, so we have to use XXSPLTI32DX.
9199
9200 uint32_t Hi =
9201 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9202 uint32_t Lo =
9203 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9204 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9205
9206 if (!Hi || !Lo)
9207 // If either load is 0, then we should generate XXLXOR to set to 0.
9208 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9209
9210 if (Hi)
9211 SplatNode = DAG.getNode(
9212 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9213 DAG.getTargetConstant(0, dl, MVT::i32),
9214 DAG.getTargetConstant(Hi, dl, MVT::i32));
9215
9216 if (Lo)
9217 SplatNode =
9218 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9219 DAG.getTargetConstant(1, dl, MVT::i32),
9220 DAG.getTargetConstant(Lo, dl, MVT::i32));
9221
9222 return DAG.getBitcast(Op.getValueType(), SplatNode);
9223 }
9224 }
9225
9226 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9227 unsigned NewOpcode = PPCISD::LD_SPLAT;
9228
9229 // Handle load-and-splat patterns as we have instructions that will do this
9230 // in one go.
9231 if (DAG.isSplatValue(Op, true) &&
9232 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9233 const SDValue *InputLoad = &Op.getOperand(0);
9234 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9235
9236 // If the input load is an extending load, it will be an i32 -> i64
9237 // extending load and isValidSplatLoad() will update NewOpcode.
9238 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9239 unsigned ElementSize =
9240 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9241
9242 assert(((ElementSize == 2 * MemorySize)
9243 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9244 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9245 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9246 "Unmatched element size and opcode!\n");
9247
9248 // Checking for a single use of this load, we have to check for vector
9249 // width (128 bits) / ElementSize uses (since each operand of the
9250 // BUILD_VECTOR is a separate use of the value.
9251 unsigned NumUsesOfInputLD = 128 / ElementSize;
9252 for (SDValue BVInOp : Op->ops())
9253 if (BVInOp.isUndef())
9254 NumUsesOfInputLD--;
9255
9256 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9257 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9258 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9259 // 15", but funciton IsValidSplatLoad() now will only return true when
9260 // the data at index 0 is not nullptr. So we will not get into trouble for
9261 // these cases.
9262 //
9263 // case 1 - lfiwzx/lfiwax
9264 // 1.1: load result is i32 and is sign/zero extend to i64;
9265 // 1.2: build a v2i64 vector type with above loaded value;
9266 // 1.3: the vector has only one value at index 0, others are all undef;
9267 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9268 if (NumUsesOfInputLD == 1 &&
9269 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9270 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9271 Subtarget.hasLFIWAX()))
9272 return SDValue();
9273
9274 // case 2 - lxvr[hb]x
9275 // 2.1: load result is at most i16;
9276 // 2.2: build a vector with above loaded value;
9277 // 2.3: the vector has only one value at index 0, others are all undef;
9278 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9279 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9280 Subtarget.isISA3_1() && ElementSize <= 16)
9281 return SDValue();
9282
9283 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9284 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9285 Subtarget.hasVSX()) {
9286 SDValue Ops[] = {
9287 LD->getChain(), // Chain
9288 LD->getBasePtr(), // Ptr
9289 DAG.getValueType(Op.getValueType()) // VT
9290 };
9291 SDValue LdSplt = DAG.getMemIntrinsicNode(
9292 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9293 LD->getMemoryVT(), LD->getMemOperand());
9294 // Replace all uses of the output chain of the original load with the
9295 // output chain of the new load.
9296 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9297 LdSplt.getValue(1));
9298 return LdSplt;
9299 }
9300 }
9301
9302 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9303 // 32-bits can be lowered to VSX instructions under certain conditions.
9304 // Without VSX, there is no pattern more efficient than expanding the node.
9305 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9307 Subtarget.hasP8Vector()))
9308 return Op;
9309 return SDValue();
9310 }
9311
9312 uint64_t SplatBits = APSplatBits.getZExtValue();
9313 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9314 unsigned SplatSize = SplatBitSize / 8;
9315
9316 // First, handle single instruction cases.
9317
9318 // All zeros?
9319 if (SplatBits == 0) {
9320 // Canonicalize all zero vectors to be v4i32.
9321 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9322 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9323 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9324 }
9325 return Op;
9326 }
9327
9328 // We have XXSPLTIW for constant splats four bytes wide.
9329 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9330 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9331 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9332 // turned into a 4-byte splat of 0xABABABAB.
9333 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9334 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9335 Op.getValueType(), DAG, dl);
9336
9337 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9338 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9339 dl);
9340
9341 // We have XXSPLTIB for constant splats one byte wide.
9342 if (Subtarget.hasP9Vector() && SplatSize == 1)
9343 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9344 dl);
9345
9346 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9347 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9348 (32-SplatBitSize));
9349 if (SextVal >= -16 && SextVal <= 15)
9350 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9351 dl);
9352
9353 // Two instruction sequences.
9354
9355 // If this value is in the range [-32,30] and is even, use:
9356 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9357 // If this value is in the range [17,31] and is odd, use:
9358 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9359 // If this value is in the range [-31,-17] and is odd, use:
9360 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9361 // Note the last two are three-instruction sequences.
9362 if (SextVal >= -32 && SextVal <= 31) {
9363 // To avoid having these optimizations undone by constant folding,
9364 // we convert to a pseudo that will be expanded later into one of
9365 // the above forms.
9366 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9367 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9368 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9369 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9370 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9371 if (VT == Op.getValueType())
9372 return RetVal;
9373 else
9374 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9375 }
9376
9377 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9378 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9379 // for fneg/fabs.
9380 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9381 // Make -1 and vspltisw -1:
9382 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9383
9384 // Make the VSLW intrinsic, computing 0x8000_0000.
9385 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9386 OnesV, DAG, dl);
9387
9388 // xor by OnesV to invert it.
9389 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9390 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9391 }
9392
9393 // Check to see if this is a wide variety of vsplti*, binop self cases.
9394 static const signed char SplatCsts[] = {
9395 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9396 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9397 };
9398
9399 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9400 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9401 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9402 int i = SplatCsts[idx];
9403
9404 // Figure out what shift amount will be used by altivec if shifted by i in
9405 // this splat size.
9406 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9407
9408 // vsplti + shl self.
9409 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9410 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9411 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9412 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9413 Intrinsic::ppc_altivec_vslw
9414 };
9415 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9416 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9417 }
9418
9419 // vsplti + srl self.
9420 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9421 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9422 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9423 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9424 Intrinsic::ppc_altivec_vsrw
9425 };
9426 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9427 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9428 }
9429
9430 // vsplti + rol self.
9431 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9432 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9433 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9434 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9435 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9436 Intrinsic::ppc_altivec_vrlw
9437 };
9438 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9439 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9440 }
9441
9442 // t = vsplti c, result = vsldoi t, t, 1
9443 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9444 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9445 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9446 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9447 }
9448 // t = vsplti c, result = vsldoi t, t, 2
9449 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9450 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9451 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9452 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9453 }
9454 // t = vsplti c, result = vsldoi t, t, 3
9455 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9456 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9457 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9458 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9459 }
9460 }
9461
9462 return SDValue();
9463}
9464
9465/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9466/// the specified operations to build the shuffle.
9467static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9468 SDValue RHS, SelectionDAG &DAG,
9469 const SDLoc &dl) {
9470 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9471 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9472 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9473
9474 enum {
9475 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9476 OP_VMRGHW,
9477 OP_VMRGLW,
9478 OP_VSPLTISW0,
9479 OP_VSPLTISW1,
9480 OP_VSPLTISW2,
9481 OP_VSPLTISW3,
9482 OP_VSLDOI4,
9483 OP_VSLDOI8,
9484 OP_VSLDOI12
9485 };
9486
9487 if (OpNum == OP_COPY) {
9488 if (LHSID == (1*9+2)*9+3) return LHS;
9489 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9490 return RHS;
9491 }
9492
9493 SDValue OpLHS, OpRHS;
9494 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9495 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9496
9497 int ShufIdxs[16];
9498 switch (OpNum) {
9499 default: llvm_unreachable("Unknown i32 permute!");
9500 case OP_VMRGHW:
9501 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9502 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9503 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9504 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9505 break;
9506 case OP_VMRGLW:
9507 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9508 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9509 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9510 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9511 break;
9512 case OP_VSPLTISW0:
9513 for (unsigned i = 0; i != 16; ++i)
9514 ShufIdxs[i] = (i&3)+0;
9515 break;
9516 case OP_VSPLTISW1:
9517 for (unsigned i = 0; i != 16; ++i)
9518 ShufIdxs[i] = (i&3)+4;
9519 break;
9520 case OP_VSPLTISW2:
9521 for (unsigned i = 0; i != 16; ++i)
9522 ShufIdxs[i] = (i&3)+8;
9523 break;
9524 case OP_VSPLTISW3:
9525 for (unsigned i = 0; i != 16; ++i)
9526 ShufIdxs[i] = (i&3)+12;
9527 break;
9528 case OP_VSLDOI4:
9529 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9530 case OP_VSLDOI8:
9531 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9532 case OP_VSLDOI12:
9533 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9534 }
9535 EVT VT = OpLHS.getValueType();
9536 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9537 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9538 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9539 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9540}
9541
9542/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9543/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9544/// SDValue.
9545SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9546 SelectionDAG &DAG) const {
9547 const unsigned BytesInVector = 16;
9548 bool IsLE = Subtarget.isLittleEndian();
9549 SDLoc dl(N);
9550 SDValue V1 = N->getOperand(0);
9551 SDValue V2 = N->getOperand(1);
9552 unsigned ShiftElts = 0, InsertAtByte = 0;
9553 bool Swap = false;
9554
9555 // Shifts required to get the byte we want at element 7.
9556 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9557 0, 15, 14, 13, 12, 11, 10, 9};
9558 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9559 1, 2, 3, 4, 5, 6, 7, 8};
9560
9561 ArrayRef<int> Mask = N->getMask();
9562 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9563
9564 // For each mask element, find out if we're just inserting something
9565 // from V2 into V1 or vice versa.
9566 // Possible permutations inserting an element from V2 into V1:
9567 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9568 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9569 // ...
9570 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9571 // Inserting from V1 into V2 will be similar, except mask range will be
9572 // [16,31].
9573
9574 bool FoundCandidate = false;
9575 // If both vector operands for the shuffle are the same vector, the mask
9576 // will contain only elements from the first one and the second one will be
9577 // undef.
9578 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9579 // Go through the mask of half-words to find an element that's being moved
9580 // from one vector to the other.
9581 for (unsigned i = 0; i < BytesInVector; ++i) {
9582 unsigned CurrentElement = Mask[i];
9583 // If 2nd operand is undefined, we should only look for element 7 in the
9584 // Mask.
9585 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9586 continue;
9587
9588 bool OtherElementsInOrder = true;
9589 // Examine the other elements in the Mask to see if they're in original
9590 // order.
9591 for (unsigned j = 0; j < BytesInVector; ++j) {
9592 if (j == i)
9593 continue;
9594 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9595 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9596 // in which we always assume we're always picking from the 1st operand.
9597 int MaskOffset =
9598 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9599 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9600 OtherElementsInOrder = false;
9601 break;
9602 }
9603 }
9604 // If other elements are in original order, we record the number of shifts
9605 // we need to get the element we want into element 7. Also record which byte
9606 // in the vector we should insert into.
9607 if (OtherElementsInOrder) {
9608 // If 2nd operand is undefined, we assume no shifts and no swapping.
9609 if (V2.isUndef()) {
9610 ShiftElts = 0;
9611 Swap = false;
9612 } else {
9613 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9614 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9615 : BigEndianShifts[CurrentElement & 0xF];
9616 Swap = CurrentElement < BytesInVector;
9617 }
9618 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9619 FoundCandidate = true;
9620 break;
9621 }
9622 }
9623
9624 if (!FoundCandidate)
9625 return SDValue();
9626
9627 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9628 // optionally with VECSHL if shift is required.
9629 if (Swap)
9630 std::swap(V1, V2);
9631 if (V2.isUndef())
9632 V2 = V1;
9633 if (ShiftElts) {
9634 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9635 DAG.getConstant(ShiftElts, dl, MVT::i32));
9636 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9637 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9638 }
9639 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9640 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9641}
9642
9643/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9644/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9645/// SDValue.
9646SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9647 SelectionDAG &DAG) const {
9648 const unsigned NumHalfWords = 8;
9649 const unsigned BytesInVector = NumHalfWords * 2;
9650 // Check that the shuffle is on half-words.
9651 if (!isNByteElemShuffleMask(N, 2, 1))
9652 return SDValue();
9653
9654 bool IsLE = Subtarget.isLittleEndian();
9655 SDLoc dl(N);
9656 SDValue V1 = N->getOperand(0);
9657 SDValue V2 = N->getOperand(1);
9658 unsigned ShiftElts = 0, InsertAtByte = 0;
9659 bool Swap = false;
9660
9661 // Shifts required to get the half-word we want at element 3.
9662 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9663 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9664
9665 uint32_t Mask = 0;
9666 uint32_t OriginalOrderLow = 0x1234567;
9667 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9668 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9669 // 32-bit space, only need 4-bit nibbles per element.
9670 for (unsigned i = 0; i < NumHalfWords; ++i) {
9671 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9672 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9673 }
9674
9675 // For each mask element, find out if we're just inserting something
9676 // from V2 into V1 or vice versa. Possible permutations inserting an element
9677 // from V2 into V1:
9678 // X, 1, 2, 3, 4, 5, 6, 7
9679 // 0, X, 2, 3, 4, 5, 6, 7
9680 // 0, 1, X, 3, 4, 5, 6, 7
9681 // 0, 1, 2, X, 4, 5, 6, 7
9682 // 0, 1, 2, 3, X, 5, 6, 7
9683 // 0, 1, 2, 3, 4, X, 6, 7
9684 // 0, 1, 2, 3, 4, 5, X, 7
9685 // 0, 1, 2, 3, 4, 5, 6, X
9686 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9687
9688 bool FoundCandidate = false;
9689 // Go through the mask of half-words to find an element that's being moved
9690 // from one vector to the other.
9691 for (unsigned i = 0; i < NumHalfWords; ++i) {
9692 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9693 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9694 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9695 uint32_t TargetOrder = 0x0;
9696
9697 // If both vector operands for the shuffle are the same vector, the mask
9698 // will contain only elements from the first one and the second one will be
9699 // undef.
9700 if (V2.isUndef()) {
9701 ShiftElts = 0;
9702 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9703 TargetOrder = OriginalOrderLow;
9704 Swap = false;
9705 // Skip if not the correct element or mask of other elements don't equal
9706 // to our expected order.
9707 if (MaskOneElt == VINSERTHSrcElem &&
9708 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9709 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9710 FoundCandidate = true;
9711 break;
9712 }
9713 } else { // If both operands are defined.
9714 // Target order is [8,15] if the current mask is between [0,7].
9715 TargetOrder =
9716 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9717 // Skip if mask of other elements don't equal our expected order.
9718 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9719 // We only need the last 3 bits for the number of shifts.
9720 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9721 : BigEndianShifts[MaskOneElt & 0x7];
9722 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9723 Swap = MaskOneElt < NumHalfWords;
9724 FoundCandidate = true;
9725 break;
9726 }
9727 }
9728 }
9729
9730 if (!FoundCandidate)
9731 return SDValue();
9732
9733 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9734 // optionally with VECSHL if shift is required.
9735 if (Swap)
9736 std::swap(V1, V2);
9737 if (V2.isUndef())
9738 V2 = V1;
9739 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9740 if (ShiftElts) {
9741 // Double ShiftElts because we're left shifting on v16i8 type.
9742 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9743 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9744 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9745 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9746 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9747 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9748 }
9749 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9750 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9751 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9752 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9753}
9754
9755/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9756/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9757/// return the default SDValue.
9758SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9759 SelectionDAG &DAG) const {
9760 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9761 // to v16i8. Peek through the bitcasts to get the actual operands.
9762 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9763 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9764
9765 auto ShuffleMask = SVN->getMask();
9766 SDValue VecShuffle(SVN, 0);
9767 SDLoc DL(SVN);
9768
9769 // Check that we have a four byte shuffle.
9770 if (!isNByteElemShuffleMask(SVN, 4, 1))
9771 return SDValue();
9772
9773 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9774 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9775 std::swap(LHS, RHS);
9777 ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9778 }
9779
9780 // Ensure that the RHS is a vector of constants.
9781 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9782 if (!BVN)
9783 return SDValue();
9784
9785 // Check if RHS is a splat of 4-bytes (or smaller).
9786 APInt APSplatValue, APSplatUndef;
9787 unsigned SplatBitSize;
9788 bool HasAnyUndefs;
9789 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9790 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9791 SplatBitSize > 32)
9792 return SDValue();
9793
9794 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9795 // The instruction splats a constant C into two words of the source vector
9796 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9797 // Thus we check that the shuffle mask is the equivalent of
9798 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9799 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9800 // within each word are consecutive, so we only need to check the first byte.
9801 SDValue Index;
9802 bool IsLE = Subtarget.isLittleEndian();
9803 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9804 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9805 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9806 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9807 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9808 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9809 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9810 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9811 else
9812 return SDValue();
9813
9814 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9815 // for XXSPLTI32DX.
9816 unsigned SplatVal = APSplatValue.getZExtValue();
9817 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9818 SplatVal |= (SplatVal << SplatBitSize);
9819
9820 SDValue SplatNode = DAG.getNode(
9822 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9823 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9824}
9825
9826/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9827/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9828/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9829/// i.e (or (shl x, C1), (srl x, 128-C1)).
9830SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9831 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9832 assert(Op.getValueType() == MVT::v1i128 &&
9833 "Only set v1i128 as custom, other type shouldn't reach here!");
9834 SDLoc dl(Op);
9835 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9836 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9837 unsigned SHLAmt = N1.getConstantOperandVal(0);
9838 if (SHLAmt % 8 == 0) {
9839 std::array<int, 16> Mask;
9840 std::iota(Mask.begin(), Mask.end(), 0);
9841 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9842 if (SDValue Shuffle =
9844 DAG.getUNDEF(MVT::v16i8), Mask))
9845 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9846 }
9847 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9848 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9849 DAG.getConstant(SHLAmt, dl, MVT::i32));
9850 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9851 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9852 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9853 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9854}
9855
9856/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9857/// is a shuffle we can handle in a single instruction, return it. Otherwise,
9858/// return the code it can be lowered into. Worst case, it can always be
9859/// lowered into a vperm.
9860SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9861 SelectionDAG &DAG) const {
9862 SDLoc dl(Op);
9863 SDValue V1 = Op.getOperand(0);
9864 SDValue V2 = Op.getOperand(1);
9866
9867 // Any nodes that were combined in the target-independent combiner prior
9868 // to vector legalization will not be sent to the target combine. Try to
9869 // combine it here.
9870 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9871 if (!isa<ShuffleVectorSDNode>(NewShuffle))
9872 return NewShuffle;
9873 Op = NewShuffle;
9874 SVOp = cast<ShuffleVectorSDNode>(Op);
9875 V1 = Op.getOperand(0);
9876 V2 = Op.getOperand(1);
9877 }
9878 EVT VT = Op.getValueType();
9879 bool isLittleEndian = Subtarget.isLittleEndian();
9880
9881 unsigned ShiftElts, InsertAtByte;
9882 bool Swap = false;
9883
9884 // If this is a load-and-splat, we can do that with a single instruction
9885 // in some cases. However if the load has multiple uses, we don't want to
9886 // combine it because that will just produce multiple loads.
9887 bool IsPermutedLoad = false;
9888 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9889 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9890 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9891 InputLoad->hasOneUse()) {
9892 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9893 int SplatIdx =
9894 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9895
9896 // The splat index for permuted loads will be in the left half of the vector
9897 // which is strictly wider than the loaded value by 8 bytes. So we need to
9898 // adjust the splat index to point to the correct address in memory.
9899 if (IsPermutedLoad) {
9900 assert((isLittleEndian || IsFourByte) &&
9901 "Unexpected size for permuted load on big endian target");
9902 SplatIdx += IsFourByte ? 2 : 1;
9903 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9904 "Splat of a value outside of the loaded memory");
9905 }
9906
9907 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9908 // For 4-byte load-and-splat, we need Power9.
9909 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9910 uint64_t Offset = 0;
9911 if (IsFourByte)
9912 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9913 else
9914 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9915
9916 // If the width of the load is the same as the width of the splat,
9917 // loading with an offset would load the wrong memory.
9918 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
9919 Offset = 0;
9920
9921 SDValue BasePtr = LD->getBasePtr();
9922 if (Offset != 0)
9924 BasePtr, DAG.getIntPtrConstant(Offset, dl));
9925 SDValue Ops[] = {
9926 LD->getChain(), // Chain
9927 BasePtr, // BasePtr
9928 DAG.getValueType(Op.getValueType()) // VT
9929 };
9930 SDVTList VTL =
9931 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9932 SDValue LdSplt =
9934 Ops, LD->getMemoryVT(), LD->getMemOperand());
9935 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9936 if (LdSplt.getValueType() != SVOp->getValueType(0))
9937 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9938 return LdSplt;
9939 }
9940 }
9941
9942 // All v2i64 and v2f64 shuffles are legal
9943 if (VT == MVT::v2i64 || VT == MVT::v2f64)
9944 return Op;
9945
9946 if (Subtarget.hasP9Vector() &&
9947 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9948 isLittleEndian)) {
9949 if (Swap)
9950 std::swap(V1, V2);
9951 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9952 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9953 if (ShiftElts) {
9954 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9955 DAG.getConstant(ShiftElts, dl, MVT::i32));
9956 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9957 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9958 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9959 }
9960 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9961 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9962 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9963 }
9964
9965 if (Subtarget.hasPrefixInstrs()) {
9966 SDValue SplatInsertNode;
9967 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9968 return SplatInsertNode;
9969 }
9970
9971 if (Subtarget.hasP9Altivec()) {
9972 SDValue NewISDNode;
9973 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9974 return NewISDNode;
9975
9976 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9977 return NewISDNode;
9978 }
9979
9980 if (Subtarget.hasVSX() &&
9981 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9982 if (Swap)
9983 std::swap(V1, V2);
9984 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9985 SDValue Conv2 =
9986 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9987
9988 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9989 DAG.getConstant(ShiftElts, dl, MVT::i32));
9990 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9991 }
9992
9993 if (Subtarget.hasVSX() &&
9994 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9995 if (Swap)
9996 std::swap(V1, V2);
9997 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9998 SDValue Conv2 =
9999 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10000
10001 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10002 DAG.getConstant(ShiftElts, dl, MVT::i32));
10003 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10004 }
10005
10006 if (Subtarget.hasP9Vector()) {
10007 if (PPC::isXXBRHShuffleMask(SVOp)) {
10008 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10009 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10010 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10011 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10012 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10013 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10014 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10015 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10016 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10017 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10018 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10019 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10020 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10021 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10022 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10023 }
10024 }
10025
10026 if (Subtarget.hasVSX()) {
10027 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10028 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10029
10030 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10031 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10032 DAG.getConstant(SplatIdx, dl, MVT::i32));
10033 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10034 }
10035
10036 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10037 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10038 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10039 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10040 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10041 }
10042 }
10043
10044 // Cases that are handled by instructions that take permute immediates
10045 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10046 // selected by the instruction selector.
10047 if (V2.isUndef()) {
10048 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10049 PPC::isSplatShuffleMask(SVOp, 2) ||
10050 PPC::isSplatShuffleMask(SVOp, 4) ||
10051 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10052 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10053 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10054 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10055 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10056 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10057 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10058 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10059 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10060 (Subtarget.hasP8Altivec() && (
10061 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10062 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10063 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10064 return Op;
10065 }
10066 }
10067
10068 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10069 // and produce a fixed permutation. If any of these match, do not lower to
10070 // VPERM.
10071 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10072 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10073 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10074 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10075 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10076 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10077 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10078 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10079 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10080 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10081 (Subtarget.hasP8Altivec() && (
10082 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10083 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10084 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10085 return Op;
10086
10087 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10088 // perfect shuffle table to emit an optimal matching sequence.
10089 ArrayRef<int> PermMask = SVOp->getMask();
10090
10091 if (!DisablePerfectShuffle && !isLittleEndian) {
10092 unsigned PFIndexes[4];
10093 bool isFourElementShuffle = true;
10094 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10095 ++i) { // Element number
10096 unsigned EltNo = 8; // Start out undef.
10097 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10098 if (PermMask[i * 4 + j] < 0)
10099 continue; // Undef, ignore it.
10100
10101 unsigned ByteSource = PermMask[i * 4 + j];
10102 if ((ByteSource & 3) != j) {
10103 isFourElementShuffle = false;
10104 break;
10105 }
10106
10107 if (EltNo == 8) {
10108 EltNo = ByteSource / 4;
10109 } else if (EltNo != ByteSource / 4) {
10110 isFourElementShuffle = false;
10111 break;
10112 }
10113 }
10114 PFIndexes[i] = EltNo;
10115 }
10116
10117 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10118 // perfect shuffle vector to determine if it is cost effective to do this as
10119 // discrete instructions, or whether we should use a vperm.
10120 // For now, we skip this for little endian until such time as we have a
10121 // little-endian perfect shuffle table.
10122 if (isFourElementShuffle) {
10123 // Compute the index in the perfect shuffle table.
10124 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10125 PFIndexes[2] * 9 + PFIndexes[3];
10126
10127 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10128 unsigned Cost = (PFEntry >> 30);
10129
10130 // Determining when to avoid vperm is tricky. Many things affect the cost
10131 // of vperm, particularly how many times the perm mask needs to be
10132 // computed. For example, if the perm mask can be hoisted out of a loop or
10133 // is already used (perhaps because there are multiple permutes with the
10134 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10135 // permute mask out of the loop requires an extra register.
10136 //
10137 // As a compromise, we only emit discrete instructions if the shuffle can
10138 // be generated in 3 or fewer operations. When we have loop information
10139 // available, if this block is within a loop, we should avoid using vperm
10140 // for 3-operation perms and use a constant pool load instead.
10141 if (Cost < 3)
10142 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10143 }
10144 }
10145
10146 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10147 // vector that will get spilled to the constant pool.
10148 if (V2.isUndef()) V2 = V1;
10149
10150 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10151 // that it is in input element units, not in bytes. Convert now.
10152
10153 // For little endian, the order of the input vectors is reversed, and
10154 // the permutation mask is complemented with respect to 31. This is
10155 // necessary to produce proper semantics with the big-endian-biased vperm
10156 // instruction.
10157 EVT EltVT = V1.getValueType().getVectorElementType();
10158 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10159
10160 SmallVector<SDValue, 16> ResultMask;
10161 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10162 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10163
10164 for (unsigned j = 0; j != BytesPerElement; ++j)
10165 if (isLittleEndian)
10166 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10167 dl, MVT::i32));
10168 else
10169 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10170 MVT::i32));
10171 }
10172
10173 ShufflesHandledWithVPERM++;
10174 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10175 LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
10176 LLVM_DEBUG(SVOp->dump());
10177 LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
10178 LLVM_DEBUG(VPermMask.dump());
10179
10180 if (isLittleEndian)
10181 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10182 V2, V1, VPermMask);
10183 else
10184 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10185 V1, V2, VPermMask);
10186}
10187
10188/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10189/// vector comparison. If it is, return true and fill in Opc/isDot with
10190/// information about the intrinsic.
10191static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10192 bool &isDot, const PPCSubtarget &Subtarget) {
10193 unsigned IntrinsicID =
10194 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10195 CompareOpc = -1;
10196 isDot = false;
10197 switch (IntrinsicID) {
10198 default:
10199 return false;
10200 // Comparison predicates.
10201 case Intrinsic::ppc_altivec_vcmpbfp_p:
10202 CompareOpc = 966;
10203 isDot = true;
10204 break;
10205 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10206 CompareOpc = 198;
10207 isDot = true;
10208 break;
10209 case Intrinsic::ppc_altivec_vcmpequb_p:
10210 CompareOpc = 6;
10211 isDot = true;
10212 break;
10213 case Intrinsic::ppc_altivec_vcmpequh_p:
10214 CompareOpc = 70;
10215 isDot = true;
10216 break;
10217 case Intrinsic::ppc_altivec_vcmpequw_p:
10218 CompareOpc = 134;
10219 isDot = true;
10220 break;
10221 case Intrinsic::ppc_altivec_vcmpequd_p:
10222 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10223 CompareOpc = 199;
10224 isDot = true;
10225 } else
10226 return false;
10227 break;
10228 case Intrinsic::ppc_altivec_vcmpneb_p:
10229 case Intrinsic::ppc_altivec_vcmpneh_p:
10230 case Intrinsic::ppc_altivec_vcmpnew_p:
10231 case Intrinsic::ppc_altivec_vcmpnezb_p:
10232 case Intrinsic::ppc_altivec_vcmpnezh_p:
10233 case Intrinsic::ppc_altivec_vcmpnezw_p:
10234 if (Subtarget.hasP9Altivec()) {
10235 switch (IntrinsicID) {
10236 default:
10237 llvm_unreachable("Unknown comparison intrinsic.");
10238 case Intrinsic::ppc_altivec_vcmpneb_p:
10239 CompareOpc = 7;
10240 break;
10241 case Intrinsic::ppc_altivec_vcmpneh_p:
10242 CompareOpc = 71;
10243 break;
10244 case Intrinsic::ppc_altivec_vcmpnew_p:
10245 CompareOpc = 135;
10246 break;
10247 case Intrinsic::ppc_altivec_vcmpnezb_p:
10248 CompareOpc = 263;
10249 break;
10250 case Intrinsic::ppc_altivec_vcmpnezh_p:
10251 CompareOpc = 327;
10252 break;
10253 case Intrinsic::ppc_altivec_vcmpnezw_p:
10254 CompareOpc = 391;
10255 break;
10256 }
10257 isDot = true;
10258 } else
10259 return false;
10260 break;
10261 case Intrinsic::ppc_altivec_vcmpgefp_p:
10262 CompareOpc = 454;
10263 isDot = true;
10264 break;
10265 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10266 CompareOpc = 710;
10267 isDot = true;
10268 break;
10269 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10270 CompareOpc = 774;
10271 isDot = true;
10272 break;
10273 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10274 CompareOpc = 838;
10275 isDot = true;
10276 break;
10277 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10278 CompareOpc = 902;
10279 isDot = true;
10280 break;
10281 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10282 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10283 CompareOpc = 967;
10284 isDot = true;
10285 } else
10286 return false;
10287 break;
10288 case Intrinsic::ppc_altivec_vcmpgtub_p:
10289 CompareOpc = 518;
10290 isDot = true;
10291 break;
10292 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10293 CompareOpc = 582;
10294 isDot = true;
10295 break;
10296 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10297 CompareOpc = 646;
10298 isDot = true;
10299 break;
10300 case Intrinsic::ppc_altivec_vcmpgtud_p:
10301 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10302 CompareOpc = 711;
10303 isDot = true;
10304 } else
10305 return false;
10306 break;
10307
10308 case Intrinsic::ppc_altivec_vcmpequq:
10309 case Intrinsic::ppc_altivec_vcmpgtsq:
10310 case Intrinsic::ppc_altivec_vcmpgtuq:
10311 if (!Subtarget.isISA3_1())
10312 return false;
10313 switch (IntrinsicID) {
10314 default:
10315 llvm_unreachable("Unknown comparison intrinsic.");
10316 case Intrinsic::ppc_altivec_vcmpequq:
10317 CompareOpc = 455;
10318 break;
10319 case Intrinsic::ppc_altivec_vcmpgtsq:
10320 CompareOpc = 903;
10321 break;
10322 case Intrinsic::ppc_altivec_vcmpgtuq:
10323 CompareOpc = 647;
10324 break;
10325 }
10326 break;
10327
10328 // VSX predicate comparisons use the same infrastructure
10329 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10330 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10331 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10332 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10333 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10334 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10335 if (Subtarget.hasVSX()) {
10336 switch (IntrinsicID) {
10337 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10338 CompareOpc = 99;
10339 break;
10340 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10341 CompareOpc = 115;
10342 break;
10343 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10344 CompareOpc = 107;
10345 break;
10346 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10347 CompareOpc = 67;
10348 break;
10349 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10350 CompareOpc = 83;
10351 break;
10352 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10353 CompareOpc = 75;
10354 break;
10355 }
10356 isDot = true;
10357 } else
10358 return false;
10359 break;
10360
10361 // Normal Comparisons.
10362 case Intrinsic::ppc_altivec_vcmpbfp:
10363 CompareOpc = 966;
10364 break;
10365 case Intrinsic::ppc_altivec_vcmpeqfp:
10366 CompareOpc = 198;
10367 break;
10368 case Intrinsic::ppc_altivec_vcmpequb:
10369 CompareOpc = 6;
10370 break;
10371 case Intrinsic::ppc_altivec_vcmpequh:
10372 CompareOpc = 70;
10373 break;
10374 case Intrinsic::ppc_altivec_vcmpequw:
10375 CompareOpc = 134;
10376 break;
10377 case Intrinsic::ppc_altivec_vcmpequd:
10378 if (Subtarget.hasP8Altivec())
10379 CompareOpc = 199;
10380 else
10381 return false;
10382 break;
10383 case Intrinsic::ppc_altivec_vcmpneb:
10384 case Intrinsic::ppc_altivec_vcmpneh:
10385 case Intrinsic::ppc_altivec_vcmpnew:
10386 case Intrinsic::ppc_altivec_vcmpnezb:
10387 case Intrinsic::ppc_altivec_vcmpnezh:
10388 case Intrinsic::ppc_altivec_vcmpnezw:
10389 if (Subtarget.hasP9Altivec())
10390 switch (IntrinsicID) {
10391 default:
10392 llvm_unreachable("Unknown comparison intrinsic.");
10393 case Intrinsic::ppc_altivec_vcmpneb:
10394 CompareOpc = 7;
10395 break;
10396 case Intrinsic::ppc_altivec_vcmpneh:
10397 CompareOpc = 71;
10398 break;
10399 case Intrinsic::ppc_altivec_vcmpnew:
10400 CompareOpc = 135;
10401 break;
10402 case Intrinsic::ppc_altivec_vcmpnezb:
10403 CompareOpc = 263;
10404 break;
10405 case Intrinsic::ppc_altivec_vcmpnezh:
10406 CompareOpc = 327;
10407 break;
10408 case Intrinsic::ppc_altivec_vcmpnezw:
10409 CompareOpc = 391;
10410 break;
10411 }
10412 else
10413 return false;
10414 break;
10415 case Intrinsic::ppc_altivec_vcmpgefp:
10416 CompareOpc = 454;
10417 break;
10418 case Intrinsic::ppc_altivec_vcmpgtfp:
10419 CompareOpc = 710;
10420 break;
10421 case Intrinsic::ppc_altivec_vcmpgtsb:
10422 CompareOpc = 774;
10423 break;
10424 case Intrinsic::ppc_altivec_vcmpgtsh:
10425 CompareOpc = 838;
10426 break;
10427 case Intrinsic::ppc_altivec_vcmpgtsw:
10428 CompareOpc = 902;
10429 break;
10430 case Intrinsic::ppc_altivec_vcmpgtsd:
10431 if (Subtarget.hasP8Altivec())
10432 CompareOpc = 967;
10433 else
10434 return false;
10435 break;
10436 case Intrinsic::ppc_altivec_vcmpgtub:
10437 CompareOpc = 518;
10438 break;
10439 case Intrinsic::ppc_altivec_vcmpgtuh:
10440 CompareOpc = 582;
10441 break;
10442 case Intrinsic::ppc_altivec_vcmpgtuw:
10443 CompareOpc = 646;
10444 break;
10445 case Intrinsic::ppc_altivec_vcmpgtud:
10446 if (Subtarget.hasP8Altivec())
10447 CompareOpc = 711;
10448 else
10449 return false;
10450 break;
10451 case Intrinsic::ppc_altivec_vcmpequq_p:
10452 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10453 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10454 if (!Subtarget.isISA3_1())
10455 return false;
10456 switch (IntrinsicID) {
10457 default:
10458 llvm_unreachable("Unknown comparison intrinsic.");
10459 case Intrinsic::ppc_altivec_vcmpequq_p:
10460 CompareOpc = 455;
10461 break;
10462 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10463 CompareOpc = 903;
10464 break;
10465 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10466 CompareOpc = 647;
10467 break;
10468 }
10469 isDot = true;
10470 break;
10471 }
10472 return true;
10473}
10474
10475/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10476/// lower, do it, otherwise return null.
10477SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10478 SelectionDAG &DAG) const {
10479 unsigned IntrinsicID =
10480 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10481
10482 SDLoc dl(Op);
10483
10484 switch (IntrinsicID) {
10485 case Intrinsic::thread_pointer:
10486 // Reads the thread pointer register, used for __builtin_thread_pointer.
10487 if (Subtarget.isPPC64())
10488 return DAG.getRegister(PPC::X13, MVT::i64);
10489 return DAG.getRegister(PPC::R2, MVT::i32);
10490
10491 case Intrinsic::ppc_mma_disassemble_acc:
10492 case Intrinsic::ppc_vsx_disassemble_pair: {
10493 int NumVecs = 2;
10494 SDValue WideVec = Op.getOperand(1);
10495 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10496 NumVecs = 4;
10497 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10498 }
10500 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10501 SDValue Extract = DAG.getNode(
10502 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10503 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10504 : VecNo,
10505 dl, getPointerTy(DAG.getDataLayout())));
10506 RetOps.push_back(Extract);
10507 }
10508 return DAG.getMergeValues(RetOps, dl);
10509 }
10510
10511 case Intrinsic::ppc_unpack_longdouble: {
10512 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10513 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10514 "Argument of long double unpack must be 0 or 1!");
10515 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10516 DAG.getConstant(!!(Idx->getSExtValue()), dl,
10517 Idx->getValueType(0)));
10518 }
10519
10520 case Intrinsic::ppc_compare_exp_lt:
10521 case Intrinsic::ppc_compare_exp_gt:
10522 case Intrinsic::ppc_compare_exp_eq:
10523 case Intrinsic::ppc_compare_exp_uo: {
10524 unsigned Pred;
10525 switch (IntrinsicID) {
10526 case Intrinsic::ppc_compare_exp_lt:
10527 Pred = PPC::PRED_LT;
10528 break;
10529 case Intrinsic::ppc_compare_exp_gt:
10530 Pred = PPC::PRED_GT;
10531 break;
10532 case Intrinsic::ppc_compare_exp_eq:
10533 Pred = PPC::PRED_EQ;
10534 break;
10535 case Intrinsic::ppc_compare_exp_uo:
10536 Pred = PPC::PRED_UN;
10537 break;
10538 }
10539 return SDValue(
10540 DAG.getMachineNode(
10541 PPC::SELECT_CC_I4, dl, MVT::i32,
10542 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10543 Op.getOperand(1), Op.getOperand(2)),
10544 0),
10545 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10546 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10547 0);
10548 }
10549 case Intrinsic::ppc_test_data_class_d:
10550 case Intrinsic::ppc_test_data_class_f: {
10551 unsigned CmprOpc = PPC::XSTSTDCDP;
10552 if (IntrinsicID == Intrinsic::ppc_test_data_class_f)
10553 CmprOpc = PPC::XSTSTDCSP;
10554 return SDValue(
10555 DAG.getMachineNode(
10556 PPC::SELECT_CC_I4, dl, MVT::i32,
10557 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10558 Op.getOperand(1)),
10559 0),
10560 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10561 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10562 0);
10563 }
10564 case Intrinsic::ppc_fnmsub: {
10565 EVT VT = Op.getOperand(1).getValueType();
10566 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
10567 return DAG.getNode(
10568 ISD::FNEG, dl, VT,
10569 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
10570 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
10571 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
10572 Op.getOperand(2), Op.getOperand(3));
10573 }
10574 case Intrinsic::ppc_convert_f128_to_ppcf128:
10575 case Intrinsic::ppc_convert_ppcf128_to_f128: {
10576 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10577 ? RTLIB::CONVERT_PPCF128_F128
10578 : RTLIB::CONVERT_F128_PPCF128;
10579 MakeLibCallOptions CallOptions;
10580 std::pair<SDValue, SDValue> Result =
10581 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
10582 dl, SDValue());
10583 return Result.first;
10584 }
10585 case Intrinsic::ppc_maxfe:
10586 case Intrinsic::ppc_maxfl:
10587 case Intrinsic::ppc_maxfs:
10588 case Intrinsic::ppc_minfe:
10589 case Intrinsic::ppc_minfl:
10590 case Intrinsic::ppc_minfs: {
10591 EVT VT = Op.getValueType();
10592 assert(
10593 all_of(Op->ops().drop_front(4),
10594 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
10595 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
10596 (void)VT;
10598 if (IntrinsicID == Intrinsic::ppc_minfe ||
10599 IntrinsicID == Intrinsic::ppc_minfl ||
10600 IntrinsicID == Intrinsic::ppc_minfs)
10601 CC = ISD::SETLT;
10602 unsigned I = Op.getNumOperands() - 2, Cnt = I;
10603 SDValue Res = Op.getOperand(I);
10604 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
10605 Res =
10606 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
10607 }
10608 return Res;
10609 }
10610 }
10611
10612 // If this is a lowered altivec predicate compare, CompareOpc is set to the
10613 // opcode number of the comparison.
10614 int CompareOpc;
10615 bool isDot;
10616 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10617 return SDValue(); // Don't custom lower most intrinsics.
10618
10619 // If this is a non-dot comparison, make the VCMP node and we are done.
10620 if (!isDot) {
10621 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10622 Op.getOperand(1), Op.getOperand(2),
10623 DAG.getConstant(CompareOpc, dl, MVT::i32));
10624 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10625 }
10626
10627 // Create the PPCISD altivec 'dot' comparison node.
10628 SDValue Ops[] = {
10629 Op.getOperand(2), // LHS
10630 Op.getOperand(3), // RHS
10631 DAG.getConstant(CompareOpc, dl, MVT::i32)
10632 };
10633 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10634 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10635
10636 // Now that we have the comparison, emit a copy from the CR to a GPR.
10637 // This is flagged to the above dot comparison.
10638 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10639 DAG.getRegister(PPC::CR6, MVT::i32),
10640 CompNode.getValue(1));
10641
10642 // Unpack the result based on how the target uses it.
10643 unsigned BitNo; // Bit # of CR6.
10644 bool InvertBit; // Invert result?
10645 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10646 default: // Can't happen, don't crash on invalid number though.
10647 case 0: // Return the value of the EQ bit of CR6.
10648 BitNo = 0; InvertBit = false;
10649 break;
10650 case 1: // Return the inverted value of the EQ bit of CR6.
10651 BitNo = 0; InvertBit = true;
10652 break;
10653 case 2: // Return the value of the LT bit of CR6.
10654 BitNo = 2; InvertBit = false;
10655 break;
10656 case 3: // Return the inverted value of the LT bit of CR6.
10657 BitNo = 2; InvertBit = true;
10658 break;
10659 }
10660
10661 // Shift the bit into the low position.
10662 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10663 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10664 // Isolate the bit.
10665 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10666 DAG.getConstant(1, dl, MVT::i32));
10667
10668 // If we are supposed to, toggle the bit.
10669 if (InvertBit)
10670 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10671 DAG.getConstant(1, dl, MVT::i32));
10672 return Flags;
10673}
10674
10675SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10676 SelectionDAG &DAG) const {
10677 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10678 // the beginning of the argument list.
10679 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10680 SDLoc DL(Op);
10681 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10682 case Intrinsic::ppc_cfence: {
10683 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10684 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10685 SDValue Val = Op.getOperand(ArgStart + 1);
10686 EVT Ty = Val.getValueType();
10687 if (Ty == MVT::i128) {
10688 // FIXME: Testing one of two paired registers is sufficient to guarantee
10689 // ordering?
10690 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
10691 }
10692 return SDValue(
10693 DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10694 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
10695 Op.getOperand(0)),
10696 0);
10697 }
10698 default:
10699 break;
10700 }
10701 return SDValue();
10702}
10703
10704// Lower scalar BSWAP64 to xxbrd.
10705SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10706 SDLoc dl(Op);
10707 if (!Subtarget.isPPC64())
10708 return Op;
10709 // MTVSRDD
10710 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10711 Op.getOperand(0));
10712 // XXBRD
10713 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10714 // MFVSRD
10715 int VectorIndex = 0;
10716 if (Subtarget.isLittleEndian())
10717 VectorIndex = 1;
10719 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10720 return Op;
10721}
10722
10723// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10724// compared to a value that is atomically loaded (atomic loads zero-extend).
10725SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10726 SelectionDAG &DAG) const {
10727 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10728 "Expecting an atomic compare-and-swap here.");
10729 SDLoc dl(Op);
10730 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10731 EVT MemVT = AtomicNode->getMemoryVT();
10732 if (MemVT.getSizeInBits() >= 32)
10733 return Op;
10734
10735 SDValue CmpOp = Op.getOperand(2);
10736 // If this is already correctly zero-extended, leave it alone.
10737 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10738 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10739 return Op;
10740
10741 // Clear the high bits of the compare operand.
10742 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10743 SDValue NewCmpOp =
10744 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10745 DAG.getConstant(MaskVal, dl, MVT::i32));
10746
10747 // Replace the existing compare operand with the properly zero-extended one.
10749 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10750 Ops.push_back(AtomicNode->getOperand(i));
10751 Ops[2] = NewCmpOp;
10752 MachineMemOperand *MMO = AtomicNode->getMemOperand();
10754 auto NodeTy =
10756 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10757}
10758
10759SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
10760 SelectionDAG &DAG) const {
10761 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
10762 EVT MemVT = N->getMemoryVT();
10763 assert(MemVT.getSimpleVT() == MVT::i128 &&
10764 "Expect quadword atomic operations");
10765 SDLoc dl(N);
10766 unsigned Opc = N->getOpcode();
10767 switch (Opc) {
10768 case ISD::ATOMIC_LOAD: {
10769 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
10770 // lowered to ppc instructions by pattern matching instruction selector.
10773 N->getOperand(0),
10774 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
10775 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
10776 Ops.push_back(N->getOperand(I));
10777 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
10778 Ops, MemVT, N->getMemOperand());
10779 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
10780 SDValue ValHi =
10781 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
10782 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
10783 DAG.getConstant(64, dl, MVT::i32));
10784 SDValue Val =
10785 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
10786 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
10787 {Val, LoadedVal.getValue(2)});
10788 }
10789 case ISD::ATOMIC_STORE: {
10790 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
10791 // lowered to ppc instructions by pattern matching instruction selector.
10792 SDVTList Tys = DAG.getVTList(MVT::Other);
10794 N->getOperand(0),
10795 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
10796 SDValue Val = N->getOperand(2);
10797 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
10798 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
10799 DAG.getConstant(64, dl, MVT::i32));
10800 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
10801 Ops.push_back(ValLo);
10802 Ops.push_back(ValHi);
10803 Ops.push_back(N->getOperand(1));
10804 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
10805 N->getMemOperand());
10806 }
10807 default:
10808 llvm_unreachable("Unexpected atomic opcode");
10809 }
10810}
10811
10812SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10813 SelectionDAG &DAG) const {
10814 SDLoc dl(Op);
10815 // Create a stack slot that is 16-byte aligned.
10817 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10818 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10819 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10820
10821 // Store the input value into Value#0 of the stack slot.
10822 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10824 // Load it out.
10825 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10826}
10827
10828SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10829 SelectionDAG &DAG) const {
10830 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10831 "Should only be called for ISD::INSERT_VECTOR_ELT");
10832
10833 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10834
10835 EVT VT = Op.getValueType();
10836 SDLoc dl(Op);
10837 SDValue V1 = Op.getOperand(0);
10838 SDValue V2 = Op.getOperand(1);
10839
10840 if (VT == MVT::v2f64 && C)
10841 return Op;
10842
10843 if (Subtarget.hasP9Vector()) {
10844 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
10845 // because on P10, it allows this specific insert_vector_elt load pattern to
10846 // utilize the refactored load and store infrastructure in order to exploit
10847 // prefixed loads.
10848 // On targets with inexpensive direct moves (Power9 and up), a
10849 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
10850 // load since a single precision load will involve conversion to double
10851 // precision on the load followed by another conversion to single precision.
10852 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
10853 (isa<LoadSDNode>(V2))) {
10854 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
10855 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
10856 SDValue InsVecElt =
10857 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
10858 BitcastLoad, Op.getOperand(2));
10859 return DAG.getBitcast(MVT::v4f32, InsVecElt);
10860 }
10861 }
10862
10863 if (Subtarget.isISA3_1()) {
10864 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
10865 return SDValue();
10866 // On P10, we have legal lowering for constant and variable indices for
10867 // all vectors.
10868 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10869 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
10870 return Op;
10871 }
10872
10873 // Before P10, we have legal lowering for constant indices but not for
10874 // variable ones.
10875 if (!C)
10876 return SDValue();
10877
10878 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10879 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10880 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10881 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10882 unsigned InsertAtElement = C->getZExtValue();
10883 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10884 if (Subtarget.isLittleEndian()) {
10885 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10886 }
10887 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10888 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10889 }
10890 return Op;
10891}
10892
10893SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10894 SelectionDAG &DAG) const {
10895 SDLoc dl(Op);
10896 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10897 SDValue LoadChain = LN->getChain();
10898 SDValue BasePtr = LN->getBasePtr();
10899 EVT VT = Op.getValueType();
10900
10901 if (VT != MVT::v256i1 && VT != MVT::v512i1)
10902 return Op;
10903
10904 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10905 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10906 // 2 or 4 vsx registers.
10907 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10908 "Type unsupported without MMA");
10909 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10910 "Type unsupported without paired vector support");
10911 Align Alignment = LN->getAlign();
10913 SmallVector<SDValue, 4> LoadChains;
10914 unsigned NumVecs = VT.getSizeInBits() / 128;
10915 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10916 SDValue Load =
10917 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10918 LN->getPointerInfo().getWithOffset(Idx * 16),
10919 commonAlignment(Alignment, Idx * 16),
10920 LN->getMemOperand()->getFlags(), LN->getAAInfo());
10921 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10922 DAG.getConstant(16, dl, BasePtr.getValueType()));
10923 Loads.push_back(Load);
10924 LoadChains.push_back(Load.getValue(1));
10925 }
10926 if (Subtarget.isLittleEndian()) {
10927 std::reverse(Loads.begin(), Loads.end());
10928 std::reverse(LoadChains.begin(), LoadChains.end());
10929 }
10930 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10931 SDValue Value =
10933 dl, VT, Loads);
10934 SDValue RetOps[] = {Value, TF};
10935 return DAG.getMergeValues(RetOps, dl);
10936}
10937
10938SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10939 SelectionDAG &DAG) const {
10940 SDLoc dl(Op);
10941 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10942 SDValue StoreChain = SN->getChain();
10943 SDValue BasePtr = SN->getBasePtr();
10944 SDValue Value = SN->getValue();
10945 EVT StoreVT = Value.getValueType();
10946
10947 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10948 return Op;
10949
10950 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10951 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10952 // underlying registers individually.
10953 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10954 "Type unsupported without MMA");
10955 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10956 "Type unsupported without paired vector support");
10957 Align Alignment = SN->getAlign();
10959 unsigned NumVecs = 2;
10960 if (StoreVT == MVT::v512i1) {
10962 NumVecs = 4;
10963 }
10964 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10965 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10967 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
10968 SDValue Store =
10969 DAG.getStore(StoreChain, dl, Elt, BasePtr,
10970 SN->getPointerInfo().getWithOffset(Idx * 16),
10971 commonAlignment(Alignment, Idx * 16),
10972 SN->getMemOperand()->getFlags(), SN->getAAInfo());
10973 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10974 DAG.getConstant(16, dl, BasePtr.getValueType()));
10975 Stores.push_back(Store);
10976 }
10977 SDValue TF = DAG.getTokenFactor(dl, Stores);
10978 return TF;
10979}
10980
10981SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10982 SDLoc dl(Op);
10983 if (Op.getValueType() == MVT::v4i32) {
10984 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10985
10986 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10987 // +16 as shift amt.
10988 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10989 SDValue RHSSwap = // = vrlw RHS, 16
10990 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10991
10992 // Shrinkify inputs to v8i16.
10993 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10994 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10995 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10996
10997 // Low parts multiplied together, generating 32-bit results (we ignore the
10998 // top parts).
10999 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11000 LHS, RHS, DAG, dl, MVT::v4i32);
11001
11002 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11003 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11004 // Shift the high parts up 16 bits.
11005 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11006 Neg16, DAG, dl);
11007 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11008 } else if (Op.getValueType() == MVT::v16i8) {
11009 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11010 bool isLittleEndian = Subtarget.isLittleEndian();
11011
11012 // Multiply the even 8-bit parts, producing 16-bit sums.
11013 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11014 LHS, RHS, DAG, dl, MVT::v8i16);
11015 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11016
11017 // Multiply the odd 8-bit parts, producing 16-bit sums.
11018 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11019 LHS, RHS, DAG, dl, MVT::v8i16);
11020 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11021
11022 // Merge the results together. Because vmuleub and vmuloub are
11023 // instructions with a big-endian bias, we must reverse the
11024 // element numbering and reverse the meaning of "odd" and "even"
11025 // when generating little endian code.
11026 int Ops[16];
11027 for (unsigned i = 0; i != 8; ++i) {
11028 if (isLittleEndian) {
11029 Ops[i*2 ] = 2*i;
11030 Ops[i*2+1] = 2*i+16;
11031 } else {
11032 Ops[i*2 ] = 2*i+1;
11033 Ops[i*2+1] = 2*i+1+16;
11034 }
11035 }
11036 if (isLittleEndian)
11037 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11038 else
11039 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11040 } else {
11041 llvm_unreachable("Unknown mul to lower!");
11042 }
11043}
11044
11045SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11046 bool IsStrict = Op->isStrictFPOpcode();
11047 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11048 !Subtarget.hasP9Vector())
11049 return SDValue();
11050
11051 return Op;
11052}
11053
11054// Custom lowering for fpext vf32 to v2f64
11055SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11056
11057 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11058 "Should only be called for ISD::FP_EXTEND");
11059
11060 // FIXME: handle extends from half precision float vectors on P9.
11061 // We only want to custom lower an extend from v2f32 to v2f64.
11062 if (Op.getValueType() != MVT::v2f64 ||
11063 Op.getOperand(0).getValueType() != MVT::v2f32)
11064 return SDValue();
11065
11066 SDLoc dl(Op);
11067 SDValue Op0 = Op.getOperand(0);
11068
11069 switch (Op0.getOpcode()) {
11070 default:
11071 return SDValue();
11073 assert(Op0.getNumOperands() == 2 &&
11075 "Node should have 2 operands with second one being a constant!");
11076
11077 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11078 return SDValue();
11079
11080 // Custom lower is only done for high or low doubleword.
11081 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
11082 if (Idx % 2 != 0)
11083 return SDValue();
11084
11085 // Since input is v4f32, at this point Idx is either 0 or 2.
11086 // Shift to get the doubleword position we want.
11087 int DWord = Idx >> 1;
11088
11089 // High and low word positions are different on little endian.
11090 if (Subtarget.isLittleEndian())
11091 DWord ^= 0x1;
11092
11094 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11095 }
11096 case ISD::FADD:
11097 case ISD::FMUL:
11098 case ISD::FSUB: {
11099 SDValue NewLoad[2];
11100 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11101 // Ensure both input are loads.
11102 SDValue LdOp = Op0.getOperand(i);
11103 if (LdOp.getOpcode() != ISD::LOAD)
11104 return SDValue();
11105 // Generate new load node.
11107 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11108 NewLoad[i] = DAG.getMemIntrinsicNode(
11110 LD->getMemoryVT(), LD->getMemOperand());
11111 }
11112 SDValue NewOp =
11113 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11114 NewLoad[1], Op0.getNode()->getFlags());
11115 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11116 DAG.getConstant(0, dl, MVT::i32));
11117 }
11118 case ISD::LOAD: {
11120 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11121 SDValue NewLd = DAG.getMemIntrinsicNode(
11123 LD->getMemoryVT(), LD->getMemOperand());
11124 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11125 DAG.getConstant(0, dl, MVT::i32));
11126 }
11127 }
11128 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11129}
11130
11131/// LowerOperation - Provide custom lowering hooks for some operations.
11132///
11134 switch (Op.getOpcode()) {
11135 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11136 case ISD::FPOW: return lowerPow(Op, DAG);
11137 case ISD::FSIN: return lowerSin(Op, DAG);
11138 case ISD::FCOS: return lowerCos(Op, DAG);
11139 case ISD::FLOG: return lowerLog(Op, DAG);
11140 case ISD::FLOG10: return lowerLog10(Op, DAG);
11141 case ISD::FEXP: return lowerExp(Op, DAG);
11142 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11143 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11144 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11145 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11146 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11147 case ISD::STRICT_FSETCC:
11149 case ISD::SETCC: return LowerSETCC(Op, DAG);
11150 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11151 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11152
11153 case ISD::INLINEASM:
11154 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11155 // Variable argument lowering.
11156 case ISD::VASTART: return LowerVASTART(Op, DAG);
11157 case ISD::VAARG: return LowerVAARG(Op, DAG);
11158 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11159
11160 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11161 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11162 case ISD::GET_DYNAMIC_AREA_OFFSET:
11163 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11164
11165 // Exception handling lowering.
11166 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11167 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11168 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11169
11170 case ISD::LOAD: return LowerLOAD(Op, DAG);
11171 case ISD::STORE: return LowerSTORE(Op, DAG);
11172 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11173 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11176 case ISD::FP_TO_UINT:
11177 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11180 case ISD::UINT_TO_FP:
11181 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11182 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
11183
11184 // Lower 64-bit shifts.
11185 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11186 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11187 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11188
11189 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11190 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11191
11192 // Vector-related lowering.
11193 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11194 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11195 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11196 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11197 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11198 case ISD::MUL: return LowerMUL(Op, DAG);
11199 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11201 case ISD::FP_ROUND:
11202 return LowerFP_ROUND(Op, DAG);
11203 case ISD::ROTL: return LowerROTL(Op, DAG);
11204
11205 // For counter-based loop handling.
11206 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11207
11208 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11209
11210 // Frame & Return address.
11211 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11212 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11213
11215 return LowerINTRINSIC_VOID(Op, DAG);
11216 case ISD::BSWAP:
11217 return LowerBSWAP(Op, DAG);
11218 case ISD::ATOMIC_CMP_SWAP:
11219 return LowerATOMIC_CMP_SWAP(Op, DAG);
11220 case ISD::ATOMIC_STORE:
11221 return LowerATOMIC_LOAD_STORE(Op, DAG);
11222 }
11223}
11224
11227 SelectionDAG &DAG) const {
11228 SDLoc dl(N);
11229 switch (N->getOpcode()) {
11230 default:
11231 llvm_unreachable("Do not know how to custom type legalize this operation!");
11232 case ISD::ATOMIC_LOAD: {
11233 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11234 Results.push_back(Res);
11235 Results.push_back(Res.getValue(1));
11236 break;
11237 }
11238 case ISD::READCYCLECOUNTER: {
11240 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11241
11242 Results.push_back(
11243 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11244 Results.push_back(RTB.getValue(2));
11245 break;
11246 }
11248 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11249 Intrinsic::loop_decrement)
11250 break;
11251
11252 assert(N->getValueType(0) == MVT::i1 &&
11253 "Unexpected result type for CTR decrement intrinsic");
11254 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11255 N->getValueType(0));
11256 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11257 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11258 N->getOperand(1));
11259
11260 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11261 Results.push_back(NewInt.getValue(1));
11262 break;
11263 }
11265 switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
11266 case Intrinsic::ppc_pack_longdouble:
11267 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11268 N->getOperand(2), N->getOperand(1)));
11269 break;
11270 case Intrinsic::ppc_maxfe:
11271 case Intrinsic::ppc_minfe:
11272 case Intrinsic::ppc_fnmsub:
11273 case Intrinsic::ppc_convert_f128_to_ppcf128:
11274 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11275 break;
11276 }
11277 break;
11278 }
11279 case ISD::VAARG: {
11280 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11281 return;
11282
11283 EVT VT = N->getValueType(0);
11284
11285 if (VT == MVT::i64) {
11286 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11287
11288 Results.push_back(NewNode);
11289 Results.push_back(NewNode.getValue(1));
11290 }
11291 return;
11292 }
11295 case ISD::FP_TO_SINT:
11296 case ISD::FP_TO_UINT: {
11297 // LowerFP_TO_INT() can only handle f32 and f64.
11298 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11300 return;
11301 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11302 Results.push_back(LoweredValue);
11303 if (N->isStrictFPOpcode())
11304 Results.push_back(LoweredValue.getValue(1));
11305 return;
11306 }
11307 case ISD::TRUNCATE: {
11308 if (!N->getValueType(0).isVector())
11309 return;
11310 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11311 if (Lowered)
11312 Results.push_back(Lowered);
11313 return;
11314 }
11315 case ISD::FSHL:
11316 case ISD::FSHR:
11317 // Don't handle funnel shifts here.
11318 return;
11319 case ISD::BITCAST:
11320 // Don't handle bitcast here.
11321 return;
11322 case ISD::FP_EXTEND:
11323 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11324 if (Lowered)
11325 Results.push_back(Lowered);
11326 return;
11327 }
11328}
11329
11330//===----------------------------------------------------------------------===//
11331// Other Lowering Code
11332//===----------------------------------------------------------------------===//
11333
11335 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11336 Function *Func = Intrinsic::getDeclaration(M, Id);
11337 return Builder.CreateCall(Func, {});
11338}
11339
11340// The mappings for emitLeading/TrailingFence is taken from
11341// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11343 Instruction *Inst,
11344 AtomicOrdering Ord) const {
11346 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11347 if (isReleaseOrStronger(Ord))
11348 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11349 return nullptr;
11350}
11351
11353 Instruction *Inst,
11354 AtomicOrdering Ord) const {
11355 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11356 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11357 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11358 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11359 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11360 return Builder.CreateCall(
11362 Builder.GetInsertBlock()->getParent()->getParent(),
11363 Intrinsic::ppc_cfence, {Inst->getType()}),
11364 {Inst});
11365 // FIXME: Can use isync for rmw operation.
11366 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11367 }
11368 return nullptr;
11369}
11370
11373 unsigned AtomicSize,
11374 unsigned BinOpcode,
11375 unsigned CmpOpcode,
11376 unsigned CmpPred) const {
11377 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11378 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11379
11380 auto LoadMnemonic = PPC::LDARX;
11381 auto StoreMnemonic = PPC::STDCX;
11382 switch (AtomicSize) {
11383 default:
11384 llvm_unreachable("Unexpected size of atomic entity");
11385 case 1:
11386 LoadMnemonic = PPC::LBARX;
11387 StoreMnemonic = PPC::STBCX;
11388 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11389 break;
11390 case 2:
11391 LoadMnemonic = PPC::LHARX;
11392 StoreMnemonic = PPC::STHCX;
11393 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11394 break;
11395 case 4:
11396 LoadMnemonic = PPC::LWARX;
11397 StoreMnemonic = PPC::STWCX;
11398 break;
11399 case 8:
11400 LoadMnemonic = PPC::LDARX;
11401 StoreMnemonic = PPC::STDCX;
11402 break;
11403 }
11404
11405 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11406 MachineFunction *F = BB->getParent();
11408
11409 Register dest = MI.getOperand(0).getReg();
11410 Register ptrA = MI.getOperand(1).getReg();
11411 Register ptrB = MI.getOperand(2).getReg();
11412 Register incr = MI.getOperand(3).getReg();
11413 DebugLoc dl = MI.getDebugLoc();
11414
11415 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11416 MachineBasicBlock *loop2MBB =
11417 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11418 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11419 F->insert(It, loopMBB);
11420 if (CmpOpcode)
11421 F->insert(It, loop2MBB);
11422 F->insert(It, exitMBB);
11423 exitMBB->splice(exitMBB->begin(), BB,
11424 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11426
11427 MachineRegisterInfo &RegInfo = F->getRegInfo();
11428 Register TmpReg = (!BinOpcode) ? incr :
11429 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11430 : &PPC::GPRCRegClass);
11431
11432 // thisMBB:
11433 // ...
11434 // fallthrough --> loopMBB
11435 BB->addSuccessor(loopMBB);
11436
11437 // loopMBB:
11438 // l[wd]arx dest, ptr
11439 // add r0, dest, incr
11440 // st[wd]cx. r0, ptr
11441 // bne- loopMBB
11442 // fallthrough --> exitMBB
11443
11444 // For max/min...
11445 // loopMBB:
11446 // l[wd]arx dest, ptr
11447 // cmpl?[wd] incr, dest
11448 // bgt exitMBB
11449 // loop2MBB:
11450 // st[wd]cx. dest, ptr
11451 // bne- loopMBB
11452 // fallthrough --> exitMBB
11453
11454 BB = loopMBB;
11455 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11456 .addReg(ptrA).addReg(ptrB);
11457 if (BinOpcode)
11458 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11459 if (CmpOpcode) {
11460 // Signed comparisons of byte or halfword values must be sign-extended.
11461 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11462 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11463 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11464 ExtReg).addReg(dest);
11465 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11466 .addReg(incr).addReg(ExtReg);
11467 } else
11468 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11469 .addReg(incr).addReg(dest);
11470
11471 BuildMI(BB, dl, TII->get(PPC::BCC))
11472 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11473 BB->addSuccessor(loop2MBB);
11474 BB->addSuccessor(exitMBB);
11475 BB = loop2MBB;
11476 }
11477 BuildMI(BB, dl, TII->get(StoreMnemonic))
11478 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11479 BuildMI(BB, dl, TII->get(PPC::BCC))
11480 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11481 BB->addSuccessor(loopMBB);
11482 BB->addSuccessor(exitMBB);
11483
11484 // exitMBB:
11485 // ...
11486 BB = exitMBB;
11487 return BB;
11488}
11489
11491 switch(MI.getOpcode()) {
11492 default:
11493 return false;
11494 case PPC::COPY:
11495 return TII->isSignExtended(MI);
11496 case PPC::LHA:
11497 case PPC::LHA8:
11498 case PPC::LHAU:
11499 case PPC::LHAU8:
11500 case PPC::LHAUX:
11501 case PPC::LHAUX8:
11502 case PPC::LHAX:
11503 case PPC::LHAX8:
11504 case PPC::LWA:
11505 case PPC::LWAUX:
11506 case PPC::LWAX:
11507 case PPC::LWAX_32:
11508 case PPC::LWA_32:
11509 case PPC::PLHA:
11510 case PPC::PLHA8:
11511 case PPC::PLHA8pc:
11512 case PPC::PLHApc:
11513 case PPC::PLWA:
11514 case PPC::PLWA8:
11515 case PPC::PLWA8pc:
11516 case PPC::PLWApc:
11517 case PPC::EXTSB:
11518 case PPC::EXTSB8:
11519 case PPC::EXTSB8_32_64:
11520 case PPC::EXTSB8_rec:
11521 case PPC::EXTSB_rec:
11522 case PPC::EXTSH:
11523 case PPC::EXTSH8:
11524 case PPC::EXTSH8_32_64:
11525 case PPC::EXTSH8_rec:
11526 case PPC::EXTSH_rec:
11527 case PPC::EXTSW:
11528 case PPC::EXTSWSLI:
11529 case PPC::EXTSWSLI_32_64:
11530 case PPC::EXTSWSLI_32_64_rec:
11531 case PPC::EXTSWSLI_rec:
11532 case PPC::EXTSW_32:
11533 case PPC::EXTSW_32_64:
11534 case PPC::EXTSW_32_64_rec:
11535 case PPC::EXTSW_rec:
11536 case PPC::SRAW:
11537 case PPC::SRAWI:
11538 case PPC::SRAWI_rec:
11539 case PPC::SRAW_rec:
11540 return true;
11541 }
11542 return false;
11543}
11544
11547 bool is8bit, // operation
11548 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11549 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11550 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11551
11552 // If this is a signed comparison and the value being compared is not known
11553 // to be sign extended, sign extend it here.
11554 DebugLoc dl = MI.getDebugLoc();
11555 MachineFunction *F = BB->getParent();
11556 MachineRegisterInfo &RegInfo = F->getRegInfo();
11557 Register incr = MI.getOperand(3).getReg();
11558 bool IsSignExtended = Register::isVirtualRegister(incr) &&
11559 isSignExtended(*RegInfo.getVRegDef(incr), TII);
11560
11561 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11562 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11563 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11564 .addReg(MI.getOperand(3).getReg());
11565 MI.getOperand(3).setReg(ValueReg);
11566 }
11567 // If we support part-word atomic mnemonics, just use them
11568 if (Subtarget.hasPartwordAtomics())
11569 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11570 CmpPred);
11571
11572 // In 64 bit mode we have to use 64 bits for addresses, even though the
11573 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11574 // registers without caring whether they're 32 or 64, but here we're
11575 // doing actual arithmetic on the addresses.
11576 bool is64bit = Subtarget.isPPC64();
11577 bool isLittleEndian = Subtarget.isLittleEndian();
11578 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11579
11580 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11582
11583 Register dest = MI.getOperand(0).getReg();
11584 Register ptrA = MI.getOperand(1).getReg();
11585 Register ptrB = MI.getOperand(2).getReg();
11586
11587 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11588 MachineBasicBlock *loop2MBB =
11589 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11590 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11591 F->insert(It, loopMBB);
11592 if (CmpOpcode)
11593 F->insert(It, loop2MBB);
11594 F->insert(It, exitMBB);
11595 exitMBB->splice(exitMBB->begin(), BB,
11596 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11598
11599 const TargetRegisterClass *RC =
11600 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11601 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11602
11603 Register PtrReg = RegInfo.createVirtualRegister(RC);
11604 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11605 Register ShiftReg =
11606 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11607 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11608 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11609 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11610 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11611 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11612 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11613 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11614 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11615 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
11616 Register Ptr1Reg;
11617 Register TmpReg =
11618 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11619
11620 // thisMBB:
11621 // ...
11622 // fallthrough --> loopMBB
11623 BB->addSuccessor(loopMBB);
11624
11625 // The 4-byte load must be aligned, while a char or short may be
11626 // anywhere in the word. Hence all this nasty bookkeeping code.
11627 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11628 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11629 // xori shift, shift1, 24 [16]
11630 // rlwinm ptr, ptr1, 0, 0, 29
11631 // slw incr2, incr, shift
11632 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11633 // slw mask, mask2, shift
11634 // loopMBB:
11635 // lwarx tmpDest, ptr
11636 // add tmp, tmpDest, incr2
11637 // andc tmp2, tmpDest, mask
11638 // and tmp3, tmp, mask
11639 // or tmp4, tmp3, tmp2
11640 // stwcx. tmp4, ptr
11641 // bne- loopMBB
11642 // fallthrough --> exitMBB
11643 // srw SrwDest, tmpDest, shift
11644 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
11645 if (ptrA != ZeroReg) {
11646 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11647 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11648 .addReg(ptrA)
11649 .addReg(ptrB);
11650 } else {
11651 Ptr1Reg = ptrB;
11652 }
11653 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11654 // mode.
11655 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11656 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11657 .addImm(3)
11658 .addImm(27)
11659 .addImm(is8bit ? 28 : 27);
11660 if (!isLittleEndian)
11661 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11662 .addReg(Shift1Reg)
11663 .addImm(is8bit ? 24 : 16);
11664 if (is64bit)
11665 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11666 .addReg(Ptr1Reg)
11667 .addImm(0)
11668 .addImm(61);
11669 else
11670 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11671 .addReg(Ptr1Reg)
11672 .addImm(0)
11673 .addImm(0)
11674 .addImm(29);
11675 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11676 if (is8bit)
11677 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11678 else {
11679 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11680 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11681 .addReg(Mask3Reg)
11682 .addImm(65535);
11683 }
11684 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11685 .addReg(Mask2Reg)
11686 .addReg(ShiftReg);
11687
11688 BB = loopMBB;
11689 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11690 .addReg(ZeroReg)
11691 .addReg(PtrReg);
11692 if (BinOpcode)
11693 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11694 .addReg(Incr2Reg)
11695 .addReg(TmpDestReg);
11696 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11697 .addReg(TmpDestReg)
11698 .addReg(MaskReg);
11699 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11700 if (CmpOpcode) {
11701 // For unsigned comparisons, we can directly compare the shifted values.
11702 // For signed comparisons we shift and sign extend.
11703 Register SReg = RegInfo.createVirtualRegister(GPRC);
11704 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11705 .addReg(TmpDestReg)
11706 .addReg(MaskReg);
11707 unsigned ValueReg = SReg;
11708 unsigned CmpReg = Incr2Reg;
11709 if (CmpOpcode == PPC::CMPW) {
11710 ValueReg = RegInfo.createVirtualRegister(GPRC);
11711 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11712 .addReg(SReg)
11713 .addReg(ShiftReg);
11714 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11715 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11716 .addReg(ValueReg);
11717 ValueReg = ValueSReg;
11718 CmpReg = incr;
11719 }
11720 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11721 .addReg(CmpReg)
11722 .addReg(ValueReg);
11723 BuildMI(BB, dl, TII->get(PPC::BCC))
11724 .addImm(CmpPred)
11725 .addReg(PPC::CR0)
11726 .addMBB(exitMBB);
11727 BB->addSuccessor(loop2MBB);
11728 BB->addSuccessor(exitMBB);
11729 BB = loop2MBB;
11730 }
11731 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11732 BuildMI(BB, dl, TII->get(PPC::STWCX))
11733 .addReg(Tmp4Reg)
11734 .addReg(ZeroReg)
11735 .addReg(PtrReg);
11736 BuildMI(BB, dl, TII->get(PPC::BCC))
11738 .addReg(PPC::CR0)
11739 .addMBB(loopMBB);
11740 BB->addSuccessor(loopMBB);
11741 BB->addSuccessor(exitMBB);
11742
11743 // exitMBB:
11744 // ...
11745 BB = exitMBB;
11746 // Since the shift amount is not a constant, we need to clear
11747 // the upper bits with a separate RLWINM.
11748 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
11749 .addReg(SrwDestReg)
11750 .addImm(0)
11751 .addImm(is8bit ? 24 : 16)
11752 .addImm(31);
11753 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
11754 .addReg(TmpDestReg)
11755 .addReg(ShiftReg);
11756 return BB;
11757}
11758
11761 MachineBasicBlock *MBB) const {
11762 DebugLoc DL = MI.getDebugLoc();
11763 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11764 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11765
11766 MachineFunction *MF = MBB->getParent();
11768
11769 const BasicBlock *BB = MBB->getBasicBlock();
11771
11772 Register DstReg = MI.getOperand(0).getReg();
11773 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11774 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11775 Register mainDstReg = MRI.createVirtualRegister(RC);
11776 Register restoreDstReg = MRI.createVirtualRegister(RC);
11777
11778 MVT PVT = getPointerTy(MF->getDataLayout());
11779 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11780 "Invalid Pointer Size!");
11781 // For v = setjmp(buf), we generate
11782 //
11783 // thisMBB:
11784 // SjLjSetup mainMBB
11785 // bl mainMBB
11786 // v_restore = 1
11787 // b sinkMBB
11788 //
11789 // mainMBB:
11790 // buf[LabelOffset] = LR
11791 // v_main = 0
11792 //
11793 // sinkMBB:
11794 // v = phi(main, restore)
11795 //
11796
11797 MachineBasicBlock *thisMBB = MBB;
11798 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11799 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11800 MF->insert(I, mainMBB);
11801 MF->insert(I, sinkMBB);
11802
11804
11805 // Transfer the remainder of BB and its successor edges to sinkMBB.
11806 sinkMBB->splice(sinkMBB->begin(), MBB,
11807 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11809
11810 // Note that the structure of the jmp_buf used here is not compatible
11811 // with that used by libc, and is not designed to be. Specifically, it
11812 // stores only those 'reserved' registers that LLVM does not otherwise
11813 // understand how to spill. Also, by convention, by the time this
11814 // intrinsic is called, Clang has already stored the frame address in the
11815 // first slot of the buffer and stack address in the third. Following the
11816 // X86 target code, we'll store the jump address in the second slot. We also
11817 // need to save the TOC pointer (R2) to handle jumps between shared
11818 // libraries, and that will be stored in the fourth slot. The thread
11819 // identifier (R13) is not affected.
11820
11821 // thisMBB:
11822 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11823 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11824 const int64_t BPOffset = 4 * PVT.getStoreSize();
11825
11826 // Prepare IP either in reg.
11827 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11828 Register LabelReg = MRI.createVirtualRegister(PtrRC);
11829 Register BufReg = MI.getOperand(1).getReg();
11830
11831 if (Subtarget.is64BitELFABI()) {
11833 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11834 .addReg(PPC::X2)
11835 .addImm(TOCOffset)
11836 .addReg(BufReg)
11837 .cloneMemRefs(MI);
11838 }
11839
11840 // Naked functions never have a base pointer, and so we use r1. For all
11841 // other functions, this decision must be delayed until during PEI.
11842 unsigned BaseReg;
11843 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11844 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11845 else
11846 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11847
11848 MIB = BuildMI(*thisMBB, MI, DL,
11849 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11850 .addReg(BaseReg)
11851 .addImm(BPOffset)
11852 .addReg(BufReg)
11853 .cloneMemRefs(MI);
11854
11855 // Setup
11856 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11857 MIB.addRegMask(TRI->getNoPreservedMask());
11858
11859 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11860
11861 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11862 .addMBB(mainMBB);
11863 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11864
11865 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11866 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11867
11868 // mainMBB:
11869 // mainDstReg = 0
11870 MIB =
11871 BuildMI(mainMBB, DL,
11872 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11873
11874 // Store IP
11875 if (Subtarget.isPPC64()) {
11876 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11877 .addReg(LabelReg)
11878 .addImm(LabelOffset)
11879 .addReg(BufReg);
11880 } else {
11881 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11882 .addReg(LabelReg)
11883 .addImm(LabelOffset)
11884 .addReg(BufReg);
11885 }
11886 MIB.cloneMemRefs(MI);
11887
11888 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11889 mainMBB->addSuccessor(sinkMBB);
11890
11891 // sinkMBB:
11892 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11893 TII->get(PPC::PHI), DstReg)
11894 .addReg(mainDstReg).addMBB(mainMBB)
11895 .addReg(restoreDstReg).addMBB(thisMBB);
11896
11897 MI.eraseFromParent();
11898 return sinkMBB;
11899}
11900
11903 MachineBasicBlock *MBB) const {
11904 DebugLoc DL = MI.getDebugLoc();
11905 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11906
11907 MachineFunction *MF = MBB->getParent();
11909
11910 MVT PVT = getPointerTy(MF->getDataLayout());
11911 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11912 "Invalid Pointer Size!");
11913
11914 const TargetRegisterClass *RC =
11915 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11916 Register Tmp = MRI.createVirtualRegister(RC);
11917 // Since FP is only updated here but NOT referenced, it's treated as GPR.
11918 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11919 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11920 unsigned BP =
11921 (PVT == MVT::i64)
11922 ? PPC::X30
11923 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11924 : PPC::R30);
11925
11927
11928 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11929 const int64_t SPOffset = 2 * PVT.getStoreSize();
11930 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11931 const int64_t BPOffset = 4 * PVT.getStoreSize();
11932
11933 Register BufReg = MI.getOperand(0).getReg();
11934
11935 // Reload FP (the jumped-to function may not have had a
11936 // frame pointer, and if so, then its r31 will be restored
11937 // as necessary).
11938 if (PVT == MVT::i64) {
11939 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11940 .addImm(0)
11941 .addReg(BufReg);
11942 } else {
11943 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11944 .addImm(0)
11945 .addReg(BufReg);
11946 }
11947 MIB.cloneMemRefs(MI);
11948
11949 // Reload IP
11950 if (PVT == MVT::i64) {
11951 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11952 .addImm(LabelOffset)
11953 .addReg(BufReg);
11954 } else {
11955 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11956 .addImm(LabelOffset)
11957 .addReg(BufReg);
11958 }
11959 MIB.cloneMemRefs(MI);
11960
11961 // Reload SP
11962 if (PVT == MVT::i64) {
11963 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11964 .addImm(SPOffset)
11965 .addReg(BufReg);
11966 } else {
11967 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11968 .addImm(SPOffset)
11969 .addReg(BufReg);
11970 }
11971 MIB.cloneMemRefs(MI);
11972
11973 // Reload BP
11974 if (PVT == MVT::i64) {
11975 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11976 .addImm(BPOffset)
11977 .addReg(BufReg);
11978 } else {
11979 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11980 .addImm(BPOffset)
11981 .addReg(BufReg);
11982 }
11983 MIB.cloneMemRefs(MI);
11984
11985 // Reload TOC
11986 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11988 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11989 .addImm(TOCOffset)
11990 .addReg(BufReg)
11991 .cloneMemRefs(MI);
11992 }
11993
11994 // Jump
11995 BuildMI(*MBB, MI, DL,
11996 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11997 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11998
11999 MI.eraseFromParent();
12000 return MBB;
12001}
12002
12004 // If the function specifically requests inline stack probes, emit them.
12005 if (MF.getFunction().hasFnAttribute("probe-stack"))
12006 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12007 "inline-asm";
12008 return false;
12009}
12010
12012 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12013 unsigned StackAlign = TFI->getStackAlignment();
12014 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12015 "Unexpected stack alignment");
12016 // The default stack probe size is 4096 if the function has no
12017 // stack-probe-size attribute.
12018 unsigned StackProbeSize = 4096;
12019 const Function &Fn = MF.getFunction();
12020 if (Fn.hasFnAttribute("stack-probe-size"))
12021 Fn.getFnAttribute("stack-probe-size")
12023 .getAsInteger(0, StackProbeSize);
12024 // Round down to the stack alignment.
12025 StackProbeSize &= ~(StackAlign - 1);
12026 return StackProbeSize ? StackProbeSize : StackAlign;
12027}
12028
12029// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12030// into three phases. In the first phase, it uses pseudo instruction
12031// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12032// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12033// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12034// MaxCallFrameSize so that it can calculate correct data area pointer.
12037 MachineBasicBlock *MBB) const {
12038 const bool isPPC64 = Subtarget.isPPC64();
12039 MachineFunction *MF = MBB->getParent();
12040 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12041 DebugLoc DL = MI.getDebugLoc();
12042 const unsigned ProbeSize = getStackProbeSize(*MF);
12043 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12045 // The CFG of probing stack looks as
12046 // +-----+
12047 // | MBB |
12048 // +--+--+
12049 // |
12050 // +----v----+
12051 // +--->+ TestMBB +---+
12052 // | +----+----+ |
12053 // | | |
12054 // | +-----v----+ |
12055 // +---+ BlockMBB | |
12056 // +----------+ |
12057 // |
12058 // +---------+ |
12059 // | TailMBB +<--+
12060 // +---------+
12061 // In MBB, calculate previous frame pointer and final stack pointer.
12062 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12063 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12064 // TailMBB is spliced via \p MI.
12065 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12066 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12067 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12068
12070 MF->insert(MBBIter, TestMBB);
12071 MF->insert(MBBIter, BlockMBB);
12072 MF->insert(MBBIter, TailMBB);
12073
12074 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12075 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12076
12077 Register DstReg = MI.getOperand(0).getReg();
12078 Register NegSizeReg = MI.getOperand(1).getReg();
12079 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12080 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12081 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12082 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12083
12084 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12085 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12086 // NegSize.
12087 unsigned ProbeOpc;
12088 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12089 ProbeOpc =
12090 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12091 else
12092 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12093 // and NegSizeReg will be allocated in the same phyreg to avoid
12094 // redundant copy when NegSizeReg has only one use which is current MI and
12095 // will be replaced by PREPARE_PROBED_ALLOCA then.
12096 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12097 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12098 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12099 .addDef(ActualNegSizeReg)
12100 .addReg(NegSizeReg)
12101 .add(MI.getOperand(2))
12102 .add(MI.getOperand(3));
12103
12104 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12105 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12106 FinalStackPtr)
12107 .addReg(SPReg)
12108 .addReg(ActualNegSizeReg);
12109
12110 // Materialize a scratch register for update.
12111 int64_t NegProbeSize = -(int64_t)ProbeSize;
12112 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12113 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12114 if (!isInt<16>(NegProbeSize)) {
12115 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12116 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12117 .addImm(NegProbeSize >> 16);
12118 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12119 ScratchReg)
12120 .addReg(TempReg)
12121 .addImm(NegProbeSize & 0xFFFF);
12122 } else
12123 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12124 .addImm(NegProbeSize);
12125
12126 {
12127 // Probing leading residual part.
12128 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12129 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12130 .addReg(ActualNegSizeReg)
12131 .addReg(ScratchReg);
12132 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12133 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12134 .addReg(Div)
12135 .addReg(ScratchReg);
12136 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12137 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12138 .addReg(Mul)
12139 .addReg(ActualNegSizeReg);
12140 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12141 .addReg(FramePointer)
12142 .addReg(SPReg)
12143 .addReg(NegMod);
12144 }
12145
12146 {
12147 // Remaining part should be multiple of ProbeSize.
12148 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12149 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12150 .addReg(SPReg)
12151 .addReg(FinalStackPtr);
12152 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12154 .addReg(CmpResult)
12155 .addMBB(TailMBB);
12156 TestMBB->addSuccessor(BlockMBB);
12157 TestMBB->addSuccessor(TailMBB);
12158 }
12159
12160 {
12161 // Touch the block.
12162 // |P...|P...|P...
12163 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12164 .addReg(FramePointer)
12165 .addReg(SPReg)
12166 .addReg(ScratchReg);
12167 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12168 BlockMBB->addSuccessor(TestMBB);
12169 }
12170
12171 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12172 // DYNAREAOFFSET pseudo instruction to get the future result.
12173 Register MaxCallFrameSizeReg =
12174 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12175 BuildMI(TailMBB, DL,
12176 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12177 MaxCallFrameSizeReg)
12178 .add(MI.getOperand(2))
12179 .add(MI.getOperand(3));
12180 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12181 .addReg(SPReg)
12182 .addReg(MaxCallFrameSizeReg);
12183
12184 // Splice instructions after MI to TailMBB.
12185 TailMBB->splice(TailMBB->end(), MBB,
12186 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12188 MBB->addSuccessor(TestMBB);
12189
12190 // Delete the pseudo instruction.
12191 MI.eraseFromParent();
12192
12193 ++NumDynamicAllocaProbed;
12194 return TailMBB;
12195}
12196
12199 MachineBasicBlock *BB) const {
12200 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12201 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12202 if (Subtarget.is64BitELFABI() &&
12203 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12204 !Subtarget.isUsingPCRelativeCalls()) {
12205 // Call lowering should have added an r2 operand to indicate a dependence
12206 // on the TOC base pointer value. It can't however, because there is no
12207 // way to mark the dependence as implicit there, and so the stackmap code
12208 // will confuse it with a regular operand. Instead, add the dependence
12209 // here.
12210 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12211 }
12212
12213 return emitPatchPoint(MI, BB);
12214 }
12215
12216 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12217 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12218 return emitEHSjLjSetJmp(MI, BB);
12219 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12220 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12221 return emitEHSjLjLongJmp(MI, BB);
12222 }
12223
12224 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12225
12226 // To "insert" these instructions we actually have to insert their
12227 // control-flow patterns.
12228 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12230
12231 MachineFunction *F = BB->getParent();
12232 MachineRegisterInfo &MRI = F->getRegInfo();
12233
12234 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12235 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
12236 MI.getOpcode() == PPC::SELECT_I8) {
12238 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12239 MI.getOpcode() == PPC::SELECT_CC_I8)
12240 Cond.push_back(MI.getOperand(4));
12241 else
12243 Cond.push_back(MI.getOperand(1));
12244
12245 DebugLoc dl = MI.getDebugLoc();
12246 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12247 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12248 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12249 MI.getOpcode() == PPC::SELECT_CC_F8 ||
12250 MI.getOpcode() == PPC::SELECT_CC_F16 ||
12251 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12252 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12253 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12254 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12255 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12256 MI.getOpcode() == PPC::SELECT_CC_SPE ||
12257 MI.getOpcode() == PPC::SELECT_F4 ||
12258 MI.getOpcode() == PPC::SELECT_F8 ||
12259 MI.getOpcode() == PPC::SELECT_F16 ||
12260 MI.getOpcode() == PPC::SELECT_SPE ||
12261 MI.getOpcode() == PPC::SELECT_SPE4 ||
12262 MI.getOpcode() == PPC::SELECT_VRRC ||
12263 MI.getOpcode() == PPC::SELECT_VSFRC ||
12264 MI.getOpcode() == PPC::SELECT_VSSRC ||
12265 MI.getOpcode() == PPC::SELECT_VSRC) {
12266 // The incoming instruction knows the destination vreg to set, the
12267 // condition code register to branch on, the true/false values to
12268 // select between, and a branch opcode to use.
12269
12270 // thisMBB:
12271 // ...
12272 // TrueVal = ...
12273 // cmpTY ccX, r1, r2
12274 // bCC copy1MBB
12275 // fallthrough --> copy0MBB
12276 MachineBasicBlock *thisMBB = BB;
12277 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12278 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12279 DebugLoc dl = MI.getDebugLoc();
12280 F->insert(It, copy0MBB);
12281 F->insert(It, sinkMBB);
12282
12283 // Transfer the remainder of BB and its successor edges to sinkMBB.
12284 sinkMBB->splice(sinkMBB->begin(), BB,
12285 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12287
12288 // Next, add the true and fallthrough blocks as its successors.
12289 BB->addSuccessor(copy0MBB);
12290 BB->addSuccessor(sinkMBB);
12291
12292 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12293 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12294 MI.getOpcode() == PPC::SELECT_F16 ||
12295 MI.getOpcode() == PPC::SELECT_SPE4 ||
12296 MI.getOpcode() == PPC::SELECT_SPE ||
12297 MI.getOpcode() == PPC::SELECT_VRRC ||
12298 MI.getOpcode() == PPC::SELECT_VSFRC ||
12299 MI.getOpcode() == PPC::SELECT_VSSRC ||
12300 MI.getOpcode() == PPC::SELECT_VSRC) {
12301 BuildMI(BB, dl, TII->get(PPC::BC))
12302 .addReg(MI.getOperand(1).getReg())
12303 .addMBB(sinkMBB);
12304 } else {
12305 unsigned SelectPred = MI.getOperand(4).getImm();
12306 BuildMI(BB, dl, TII->get(PPC::BCC))
12307 .addImm(SelectPred)
12308 .addReg(MI.getOperand(1).getReg())
12309 .addMBB(sinkMBB);
12310 }
12311
12312 // copy0MBB:
12313 // %FalseValue = ...
12314 // # fallthrough to sinkMBB
12315 BB = copy0MBB;
12316
12317 // Update machine-CFG edges
12318 BB->addSuccessor(sinkMBB);
12319
12320 // sinkMBB:
12321 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12322 // ...
12323 BB = sinkMBB;
12324 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12325 .addReg(MI.getOperand(3).getReg())
12326 .addMBB(copy0MBB)
12327 .addReg(MI.getOperand(2).getReg())
12328 .addMBB(thisMBB);
12329 } else if (MI.getOpcode() == PPC::ReadTB) {
12330 // To read the 64-bit time-base register on a 32-bit target, we read the
12331 // two halves. Should the counter have wrapped while it was being read, we
12332 // need to try again.
12333 // ...
12334 // readLoop:
12335 // mfspr Rx,TBU # load from TBU
12336 // mfspr Ry,TB # load from TB
12337 // mfspr Rz,TBU # load from TBU
12338 // cmpw crX,Rx,Rz # check if 'old'='new'
12339 // bne readLoop # branch if they're not equal
12340 // ...
12341
12342 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12343 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12344 DebugLoc dl = MI.getDebugLoc();
12345 F->insert(It, readMBB);
12346 F->insert(It, sinkMBB);
12347
12348 // Transfer the remainder of BB and its successor edges to sinkMBB.
12349 sinkMBB->splice(sinkMBB->begin(), BB,
12350 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12352
12353 BB->addSuccessor(readMBB);
12354 BB = readMBB;
12355
12356 MachineRegisterInfo &RegInfo = F->getRegInfo();
12357 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12358 Register LoReg = MI.getOperand(0).getReg();
12359 Register HiReg = MI.getOperand(1).getReg();
12360
12361 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12362 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12363 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12364
12365 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12366
12367 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12368 .addReg(HiReg)
12369 .addReg(ReadAgainReg);
12370 BuildMI(BB, dl, TII->get(PPC::BCC))
12372 .addReg(CmpReg)
12373 .addMBB(readMBB);
12374
12375 BB->addSuccessor(readMBB);
12376 BB->addSuccessor(sinkMBB);
12377 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12378 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12379 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12380 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12381 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12382 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12383 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12384 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12385
12386 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12387 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12388 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12389 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12390 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12391 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12392 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12393 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12394
12395 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12396 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12397 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12398 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12399 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12400 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12401 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12402 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12403
12404 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12405 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12406 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12407 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12408 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12409 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12410 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12411 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12412
12413 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12414 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12415 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12416 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12417 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12418 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12419 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12420 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12421
12422 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12423 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12424 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12425 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12426 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12427 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12428 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12429 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12430
12431 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12432 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12433 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12434 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12435 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12436 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12437 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12438 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12439
12440 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12441 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12442 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12443 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12444 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12445 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12446 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12447 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12448
12449 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12450 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12451 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12452 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12453 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12454 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12455 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12456 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12457
12458 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12459 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12460 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12461 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12462 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12463 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12464 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12465 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12466
12467 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12468 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12469 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12470 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12471 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12472 BB = EmitAtomicBinary(MI, BB, 4, 0);
12473 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12474 BB = EmitAtomicBinary(MI, BB, 8, 0);
12475 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12476 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12477 (Subtarget.hasPartwordAtomics() &&
12478 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12479 (Subtarget.hasPartwordAtomics() &&
12480 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12481 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12482
12483 auto LoadMnemonic = PPC::LDARX;
12484 auto StoreMnemonic = PPC::STDCX;
12485 switch (MI.getOpcode()) {
12486 default:
12487 llvm_unreachable("Compare and swap of unknown size");
12488 case PPC::ATOMIC_CMP_SWAP_I8:
12489 LoadMnemonic = PPC::LBARX;
12490 StoreMnemonic = PPC::STBCX;
12491 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12492 break;
12493 case PPC::ATOMIC_CMP_SWAP_I16:
12494 LoadMnemonic = PPC::LHARX;
12495 StoreMnemonic = PPC::STHCX;
12496 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12497 break;
12498 case PPC::ATOMIC_CMP_SWAP_I32:
12499 LoadMnemonic = PPC::LWARX;
12500 StoreMnemonic = PPC::STWCX;
12501 break;
12502 case PPC::ATOMIC_CMP_SWAP_I64:
12503 LoadMnemonic = PPC::LDARX;
12504 StoreMnemonic = PPC::STDCX;
12505 break;
12506 }
12507 Register dest = MI.getOperand(0).getReg();
12508 Register ptrA = MI.getOperand(1).getReg();
12509 Register ptrB = MI.getOperand(2).getReg();
12510 Register oldval = MI.getOperand(3).getReg();
12511 Register newval = MI.getOperand(4).getReg();
12512 DebugLoc dl = MI.getDebugLoc();
12513
12514 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12515 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12516 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12517 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12518 F->insert(It, loop1MBB);
12519 F->insert(It, loop2MBB);
12520 F->insert(It, midMBB);
12521 F->insert(It, exitMBB);
12522 exitMBB->splice(exitMBB->begin(), BB,
12523 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12525
12526 // thisMBB:
12527 // ...
12528 // fallthrough --> loopMBB
12529 BB->addSuccessor(loop1MBB);
12530
12531 // loop1MBB:
12532 // l[bhwd]arx dest, ptr
12533 // cmp[wd] dest, oldval
12534 // bne- midMBB
12535 // loop2MBB:
12536 // st[bhwd]cx. newval, ptr
12537 // bne- loopMBB
12538 // b exitBB
12539 // midMBB:
12540 // st[bhwd]cx. dest, ptr
12541 // exitBB:
12542 BB = loop1MBB;
12543 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12544 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12545 .addReg(oldval)
12546 .addReg(dest);
12547 BuildMI(BB, dl, TII->get(PPC::BCC))
12549 .addReg(PPC::CR0)
12550 .addMBB(midMBB);
12551 BB->addSuccessor(loop2MBB);
12552 BB->addSuccessor(midMBB);
12553
12554 BB = loop2MBB;
12555 BuildMI(BB, dl, TII->get(StoreMnemonic))
12556 .addReg(newval)
12557 .addReg(ptrA)
12558 .addReg(ptrB);
12559 BuildMI(BB, dl, TII->get(PPC::BCC))
12561 .addReg(PPC::CR0)
12562 .addMBB(loop1MBB);
12563 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12564 BB->addSuccessor(loop1MBB);
12565 BB->addSuccessor(exitMBB);
12566
12567 BB = midMBB;
12568 BuildMI(BB, dl, TII->get(StoreMnemonic))
12569 .addReg(dest)
12570 .addReg(ptrA)
12571 .addReg(ptrB);
12572 BB->addSuccessor(exitMBB);
12573
12574 // exitMBB:
12575 // ...
12576 BB = exitMBB;
12577 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12578 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12579 // We must use 64-bit registers for addresses when targeting 64-bit,
12580 // since we're actually doing arithmetic on them. Other registers
12581 // can be 32-bit.
12582 bool is64bit = Subtarget.isPPC64();
12583 bool isLittleEndian = Subtarget.isLittleEndian();
12584 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12585
12586 Register dest = MI.getOperand(0).getReg();
12587 Register ptrA = MI.getOperand(1).getReg();
12588 Register ptrB = MI.getOperand(2).getReg();
12589 Register oldval = MI.getOperand(3).getReg();
12590 Register newval = MI.getOperand(4).getReg();
12591 DebugLoc dl = MI.getDebugLoc();
12592
12593 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12594 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12595 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12596 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12597 F->insert(It, loop1MBB);
12598 F->insert(It, loop2MBB);
12599 F->insert(It, midMBB);
12600 F->insert(It, exitMBB);
12601 exitMBB->splice(exitMBB->begin(), BB,
12602 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12604
12605 MachineRegisterInfo &RegInfo = F->getRegInfo();
12606 const TargetRegisterClass *RC =
12607 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12608 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12609
12610 Register PtrReg = RegInfo.createVirtualRegister(RC);
12611 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12612 Register ShiftReg =
12613 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12614 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12615 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12616 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12617 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12618 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12619 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12620 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12621 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12622 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12623 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12624 Register Ptr1Reg;
12625 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12626 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12627 // thisMBB:
12628 // ...
12629 // fallthrough --> loopMBB
12630 BB->addSuccessor(loop1MBB);
12631
12632 // The 4-byte load must be aligned, while a char or short may be
12633 // anywhere in the word. Hence all this nasty bookkeeping code.
12634 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12635 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12636 // xori shift, shift1, 24 [16]
12637 // rlwinm ptr, ptr1, 0, 0, 29
12638 // slw newval2, newval, shift
12639 // slw oldval2, oldval,shift
12640 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12641 // slw mask, mask2, shift
12642 // and newval3, newval2, mask
12643 // and oldval3, oldval2, mask
12644 // loop1MBB:
12645 // lwarx tmpDest, ptr
12646 // and tmp, tmpDest, mask
12647 // cmpw tmp, oldval3
12648 // bne- midMBB
12649 // loop2MBB:
12650 // andc tmp2, tmpDest, mask
12651 // or tmp4, tmp2, newval3
12652 // stwcx. tmp4, ptr
12653 // bne- loop1MBB
12654 // b exitBB
12655 // midMBB:
12656 // stwcx. tmpDest, ptr
12657 // exitBB:
12658 // srw dest, tmpDest, shift
12659 if (ptrA != ZeroReg) {
12660 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12661 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12662 .addReg(ptrA)
12663 .addReg(ptrB);
12664 } else {
12665 Ptr1Reg = ptrB;
12666 }
12667
12668 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12669 // mode.
12670 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12671 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12672 .addImm(3)
12673 .addImm(27)
12674 .addImm(is8bit ? 28 : 27);
12675 if (!isLittleEndian)
12676 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12677 .addReg(Shift1Reg)
12678 .addImm(is8bit ? 24 : 16);
12679 if (is64bit)
12680 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12681 .addReg(Ptr1Reg)
12682 .addImm(0)
12683 .addImm(61);
12684 else
12685 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12686 .addReg(Ptr1Reg)
12687 .addImm(0)
12688 .addImm(0)
12689 .addImm(29);
12690 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12691 .addReg(newval)
12692 .addReg(ShiftReg);
12693 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12694 .addReg(oldval)
12695 .addReg(ShiftReg);
12696 if (is8bit)
12697 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12698 else {
12699 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12700 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12701 .addReg(Mask3Reg)
12702 .addImm(65535);
12703 }
12704 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12705 .addReg(Mask2Reg)
12706 .addReg(ShiftReg);
12707 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12708 .addReg(NewVal2Reg)
12709 .addReg(MaskReg);
12710 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12711 .addReg(OldVal2Reg)
12712 .addReg(MaskReg);
12713
12714 BB = loop1MBB;
12715 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12716 .addReg(ZeroReg)
12717 .addReg(PtrReg);
12718 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12719 .addReg(TmpDestReg)
12720 .addReg(MaskReg);
12721 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12722 .addReg(TmpReg)
12723 .addReg(OldVal3Reg);
12724 BuildMI(BB, dl, TII->get(PPC::BCC))
12726 .addReg(PPC::CR0)
12727 .addMBB(midMBB);
12728 BB->addSuccessor(loop2MBB);
12729 BB->addSuccessor(midMBB);
12730
12731 BB = loop2MBB;
12732 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12733 .addReg(TmpDestReg)
12734 .addReg(MaskReg);
12735 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12736 .addReg(Tmp2Reg)
12737 .addReg(NewVal3Reg);
12738 BuildMI(BB, dl, TII->get(PPC::STWCX))
12739 .addReg(Tmp4Reg)
12740 .addReg(ZeroReg)
12741 .addReg(PtrReg);
12742 BuildMI(BB, dl, TII->get(PPC::BCC))
12744 .addReg(PPC::CR0)
12745 .addMBB(loop1MBB);
12746 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12747 BB->addSuccessor(loop1MBB);
12748 BB->addSuccessor(exitMBB);
12749
12750 BB = midMBB;
12751 BuildMI(BB, dl, TII->get(PPC::STWCX))
12752 .addReg(TmpDestReg)
12753 .addReg(ZeroReg)
12754 .addReg(PtrReg);
12755 BB->addSuccessor(exitMBB);
12756
12757 // exitMBB:
12758 // ...
12759 BB = exitMBB;
12760 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12761 .addReg(TmpReg)
12762 .addReg(ShiftReg);
12763 } else if (MI.getOpcode() == PPC::FADDrtz) {
12764 // This pseudo performs an FADD with rounding mode temporarily forced
12765 // to round-to-zero. We emit this via custom inserter since the FPSCR
12766 // is not modeled at the SelectionDAG level.
12767 Register Dest = MI.getOperand(0).getReg();
12768 Register Src1 = MI.getOperand(1).getReg();
12769 Register Src2 = MI.getOperand(2).getReg();
12770 DebugLoc dl = MI.getDebugLoc();
12771
12772 MachineRegisterInfo &RegInfo = F->getRegInfo();
12773 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12774
12775 // Save FPSCR value.
12776 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12777
12778 // Set rounding mode to round-to-zero.
12779 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12780 .addImm(31)
12782
12783 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12784 .addImm(30)
12786
12787 // Perform addition.
12788 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12789 .addReg(Src1)
12790 .addReg(Src2);
12791 if (MI.getFlag(MachineInstr::NoFPExcept))
12793
12794 // Restore FPSCR value.
12795 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12796 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12797 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12798 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12799 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12800 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12801 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12802 ? PPC::ANDI8_rec
12803 : PPC::ANDI_rec;
12804 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12805 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12806
12807 MachineRegisterInfo &RegInfo = F->getRegInfo();
12808 Register Dest = RegInfo.createVirtualRegister(
12809 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12810
12811 DebugLoc Dl = MI.getDebugLoc();
12812 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12813 .addReg(MI.getOperand(1).getReg())
12814 .addImm(1);
12815 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12816 MI.getOperand(0).getReg())
12817 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12818 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12819 DebugLoc Dl = MI.getDebugLoc();
12820 MachineRegisterInfo &RegInfo = F->getRegInfo();
12821 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12822 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12823 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12824 MI.getOperand(0).getReg())
12825 .addReg(CRReg);
12826 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12827 DebugLoc Dl = MI.getDebugLoc();
12828 unsigned Imm = MI.getOperand(1).getImm();
12829 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12830 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12831 MI.getOperand(0).getReg())
12832 .addReg(PPC::CR0EQ);
12833 } else if (MI.getOpcode() == PPC::SETRNDi) {
12834 DebugLoc dl = MI.getDebugLoc();
12835 Register OldFPSCRReg = MI.getOperand(0).getReg();
12836
12837 // Save FPSCR value.
12838 if (MRI.use_empty(OldFPSCRReg))
12839 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12840 else
12841 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12842
12843 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12844 // the following settings:
12845 // 00 Round to nearest
12846 // 01 Round to 0
12847 // 10 Round to +inf
12848 // 11 Round to -inf
12849
12850 // When the operand is immediate, using the two least significant bits of
12851 // the immediate to set the bits 62:63 of FPSCR.
12852 unsigned Mode = MI.getOperand(1).getImm();
12853 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12854 .addImm(31)
12856
12857 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12858 .addImm(30)
12860 } else if (MI.getOpcode() == PPC::SETRND) {
12861 DebugLoc dl = MI.getDebugLoc();
12862
12863 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12864 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12865 // If the target doesn't have DirectMove, we should use stack to do the
12866 // conversion, because the target doesn't have the instructions like mtvsrd
12867 // or mfvsrd to do this conversion directly.
12868 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12869 if (Subtarget.hasDirectMove()) {
12870 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12871 .addReg(SrcReg);
12872 } else {
12873 // Use stack to do the register copy.
12874 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12875 MachineRegisterInfo &RegInfo = F->getRegInfo();
12876 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12877 if (RC == &PPC::F8RCRegClass) {
12878 // Copy register from F8RCRegClass to G8RCRegclass.
12879 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12880 "Unsupported RegClass.");
12881
12882 StoreOp = PPC::STFD;
12883 LoadOp = PPC::LD;
12884 } else {
12885 // Copy register from G8RCRegClass to F8RCRegclass.
12886 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12887 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12888 "Unsupported RegClass.");
12889 }
12890
12891 MachineFrameInfo &MFI = F->getFrameInfo();
12892 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12893
12894 MachineMemOperand *MMOStore = F->getMachineMemOperand(
12895 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12897 MFI.getObjectAlign(FrameIdx));
12898
12899 // Store the SrcReg into the stack.
12900 BuildMI(*BB, MI, dl, TII->get(StoreOp))
12901 .addReg(SrcReg)
12902 .addImm(0)
12903 .addFrameIndex(FrameIdx)
12904 .addMemOperand(MMOStore);
12905
12906 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12907 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12909 MFI.getObjectAlign(FrameIdx));
12910
12911 // Load from the stack where SrcReg is stored, and save to DestReg,
12912 // so we have done the RegClass conversion from RegClass::SrcReg to
12913 // RegClass::DestReg.
12914 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12915 .addImm(0)
12916 .addFrameIndex(FrameIdx)
12917 .addMemOperand(MMOLoad);
12918 }
12919 };
12920
12921 Register OldFPSCRReg = MI.getOperand(0).getReg();
12922
12923 // Save FPSCR value.
12924 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12925
12926 // When the operand is gprc register, use two least significant bits of the
12927 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12928 //
12929 // copy OldFPSCRTmpReg, OldFPSCRReg
12930 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12931 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12932 // copy NewFPSCRReg, NewFPSCRTmpReg
12933 // mtfsf 255, NewFPSCRReg
12934 MachineOperand SrcOp = MI.getOperand(1);
12935 MachineRegisterInfo &RegInfo = F->getRegInfo();
12936 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12937
12938 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12939
12940 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12941 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12942
12943 // The first operand of INSERT_SUBREG should be a register which has
12944 // subregisters, we only care about its RegClass, so we should use an
12945 // IMPLICIT_DEF register.
12946 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12947 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12948 .addReg(ImDefReg)
12949 .add(SrcOp)
12950 .addImm(1);
12951
12952 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12953 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12954 .addReg(OldFPSCRTmpReg)
12955 .addReg(ExtSrcReg)
12956 .addImm(0)
12957 .addImm(62);
12958
12959 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12960 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12961
12962 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12963 // bits of FPSCR.
12964 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12965 .addImm(255)
12966 .addReg(NewFPSCRReg)
12967 .addImm(0)
12968 .addImm(0);
12969 } else if (MI.getOpcode() == PPC::SETFLM) {
12970 DebugLoc Dl = MI.getDebugLoc();
12971
12972 // Result of setflm is previous FPSCR content, so we need to save it first.
12973 Register OldFPSCRReg = MI.getOperand(0).getReg();
12974 if (MRI.use_empty(OldFPSCRReg))
12975 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12976 else
12977 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12978
12979 // Put bits in 32:63 to FPSCR.
12980 Register NewFPSCRReg = MI.getOperand(1).getReg();
12981 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12982 .addImm(255)
12983 .addReg(NewFPSCRReg)
12984 .addImm(0)
12985 .addImm(0);
12986 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12987 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12988 return emitProbedAlloca(MI, BB);
12989 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
12990 DebugLoc DL = MI.getDebugLoc();
12991 Register Src = MI.getOperand(2).getReg();
12992 Register Lo = MI.getOperand(0).getReg();
12993 Register Hi = MI.getOperand(1).getReg();
12994 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12995 .addDef(Lo)
12996 .addUse(Src, 0, PPC::sub_gp8_x1);
12997 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12998 .addDef(Hi)
12999 .addUse(Src, 0, PPC::sub_gp8_x0);
13000 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13001 MI.getOpcode() == PPC::STQX_PSEUDO) {
13002 DebugLoc DL = MI.getDebugLoc();
13003 // Ptr is used as the ptr_rc_no_r0 part
13004 // of LQ/STQ's memory operand and adding result of RA and RB,
13005 // so it has to be g8rc_and_g8rc_nox0.
13006 Register Ptr =
13007 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13008 Register Val = MI.getOperand(0).getReg();
13009 Register RA = MI.getOperand(1).getReg();
13010 Register RB = MI.getOperand(2).getReg();
13011 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13012 BuildMI(*BB, MI, DL,
13013 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13014 : TII->get(PPC::STQ))
13015 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13016 .addImm(0)
13017 .addReg(Ptr);
13018 } else {
13019 llvm_unreachable("Unexpected instr type to insert");
13020 }
13021
13022 MI.eraseFromParent(); // The pseudo instruction is gone now.
13023 return BB;
13024}
13025
13026//===----------------------------------------------------------------------===//
13027// Target Optimization Hooks
13028//===----------------------------------------------------------------------===//
13029
13030static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13031 // For the estimates, convergence is quadratic, so we essentially double the
13032 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13033 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13034 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13035 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13036 if (VT.getScalarType() == MVT::f64)
13037 RefinementSteps++;
13038 return RefinementSteps;
13039}
13040
13041SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13042 const DenormalMode &Mode) const {
13043 // We only have VSX Vector Test for software Square Root.
13044 EVT VT = Op.getValueType();
13045 if (!isTypeLegal(MVT::i1) ||
13046 (VT != MVT::f64 &&
13047 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13048 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13049
13050 SDLoc DL(Op);
13051 // The output register of FTSQRT is CR field.
13053 // ftsqrt BF,FRB
13054 // Let e_b be the unbiased exponent of the double-precision
13055 // floating-point operand in register FRB.
13056 // fe_flag is set to 1 if either of the following conditions occurs.
13057 // - The double-precision floating-point operand in register FRB is a zero,
13058 // a NaN, or an infinity, or a negative value.
13059 // - e_b is less than or equal to -970.
13060 // Otherwise fe_flag is set to 0.
13061 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13062 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13063 // exponent is less than -970)
13064 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13065 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13066 FTSQRT, SRIdxVal),
13067 0);
13068}
13069
13070SDValue
13071PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13072 SelectionDAG &DAG) const {
13073 // We only have VSX Vector Square Root.
13074 EVT VT = Op.getValueType();
13075 if (VT != MVT::f64 &&
13076 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13078
13079 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13080}
13081
13082SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13083 int Enabled, int &RefinementSteps,
13084 bool &UseOneConstNR,
13085 bool Reciprocal) const {
13086 EVT VT = Operand.getValueType();
13087 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13088 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13089 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13090 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13091 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13092 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13093
13094 // The Newton-Raphson computation with a single constant does not provide
13095 // enough accuracy on some CPUs.
13096 UseOneConstNR = !Subtarget.needsTwoConstNR();
13097 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13098 }
13099 return SDValue();
13100}
13101
13102SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13103 int Enabled,
13104 int &RefinementSteps) const {
13105 EVT VT = Operand.getValueType();
13106 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13107 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13108 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13109 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13110 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13111 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13112 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13113 }
13114 return SDValue();
13115}
13116
13117unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13118 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13119 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13120 // enabled for division), this functionality is redundant with the default
13121 // combiner logic (once the division -> reciprocal/multiply transformation
13122 // has taken place). As a result, this matters more for older cores than for
13123 // newer ones.
13124
13125 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13126 // reciprocal if there are two or more FDIVs (for embedded cores with only
13127 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13128 switch (Subtarget.getCPUDirective()) {
13129 default:
13130 return 3;
13131 case PPC::DIR_440:
13132 case PPC::DIR_A2:
13133 case PPC::DIR_E500:
13134 case PPC::DIR_E500mc:
13135 case PPC::DIR_E5500:
13136 return 2;
13137 }
13138}
13139
13140// isConsecutiveLSLoc needs to work even if all adds have not yet been
13141// collapsed, and so we need to look through chains of them.
13143 int64_t& Offset, SelectionDAG &DAG) {
13144 if (DAG.isBaseWithConstantOffset(Loc)) {
13145 Base = Loc.getOperand(0);
13146 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13147
13148 // The base might itself be a base plus an offset, and if so, accumulate
13149 // that as well.
13150 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
13151 }
13152}
13153
13155 unsigned Bytes, int Dist,
13156 SelectionDAG &DAG) {
13157 if (VT.getSizeInBits() / 8 != Bytes)
13158 return false;
13159
13160 SDValue BaseLoc = Base->getBasePtr();
13161 if (Loc.getOpcode() == ISD::FrameIndex) {
13162 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13163 return false;
13165 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13166 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13167 int FS = MFI.getObjectSize(FI);
13168 int BFS = MFI.getObjectSize(BFI);
13169 if (FS != BFS || FS != (int)Bytes) return false;
13170 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13171 }
13172
13173 SDValue Base1 = Loc, Base2 = BaseLoc;
13174 int64_t Offset1 = 0, Offset2 = 0;
13175 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13176 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13177 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13178 return true;
13179
13180 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13181 const GlobalValue *GV1 = nullptr;
13182 const GlobalValue *GV2 = nullptr;
13183 Offset1 = 0;
13184 Offset2 = 0;
13185 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13186 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13187 if (isGA1 && isGA2 && GV1 == GV2)
13188 return Offset1 == (Offset2 + Dist*Bytes);
13189 return false;
13190}
13191
13192// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13193// not enforce equality of the chain operands.
13195 unsigned Bytes, int Dist,
13196 SelectionDAG &DAG) {
13198 EVT VT = LS->getMemoryVT();
13199 SDValue Loc = LS->getBasePtr();
13200 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13201 }
13202
13203 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13204 EVT VT;
13205 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13206 default: return false;
13207 case Intrinsic::ppc_altivec_lvx:
13208 case Intrinsic::ppc_altivec_lvxl:
13209 case Intrinsic::ppc_vsx_lxvw4x:
13210 case Intrinsic::ppc_vsx_lxvw4x_be:
13211 VT = MVT::v4i32;
13212 break;
13213 case Intrinsic::ppc_vsx_lxvd2x:
13214 case Intrinsic::ppc_vsx_lxvd2x_be:
13215 VT = MVT::v2f64;
13216 break;
13217 case Intrinsic::ppc_altivec_lvebx:
13218 VT = MVT::i8;
13219 break;
13220 case Intrinsic::ppc_altivec_lvehx:
13221 VT = MVT::i16;
13222 break;
13223 case Intrinsic::ppc_altivec_lvewx:
13224 VT = MVT::i32;
13225 break;
13226 }
13227
13228 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13229 }
13230
13231 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13232 EVT VT;
13233 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13234 default: return false;
13235 case Intrinsic::ppc_altivec_stvx:
13236 case Intrinsic::ppc_altivec_stvxl:
13237 case Intrinsic::ppc_vsx_stxvw4x:
13238 VT = MVT::v4i32;
13239 break;
13240 case Intrinsic::ppc_vsx_stxvd2x:
13241 VT = MVT::v2f64;
13242 break;
13243 case Intrinsic::ppc_vsx_stxvw4x_be:
13244 VT = MVT::v4i32;
13245 break;
13246 case Intrinsic::ppc_vsx_stxvd2x_be:
13247 VT = MVT::v2f64;
13248 break;
13249 case Intrinsic::ppc_altivec_stvebx:
13250 VT = MVT::i8;
13251 break;
13252 case Intrinsic::ppc_altivec_stvehx:
13253 VT = MVT::i16;
13254 break;
13255 case Intrinsic::ppc_altivec_stvewx:
13256 VT = MVT::i32;
13257 break;
13258 }
13259
13260 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13261 }
13262
13263 return false;
13264}
13265
13266// Return true is there is a nearyby consecutive load to the one provided
13267// (regardless of alignment). We search up and down the chain, looking though
13268// token factors and other loads (but nothing else). As a result, a true result
13269// indicates that it is safe to create a new consecutive load adjacent to the
13270// load provided.
13272 SDValue Chain = LD->getChain();
13273 EVT VT = LD->getMemoryVT();
13274
13275 SmallSet<SDNode *, 16> LoadRoots;
13276 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13277 SmallSet<SDNode *, 16> Visited;
13278
13279 // First, search up the chain, branching to follow all token-factor operands.
13280 // If we find a consecutive load, then we're done, otherwise, record all
13281 // nodes just above the top-level loads and token factors.
13282 while (!Queue.empty()) {
13283 SDNode *ChainNext = Queue.pop_back_val();
13284 if (!Visited.insert(ChainNext).second)
13285 continue;
13286
13287 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13288 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13289 return true;
13290
13291 if (!Visited.count(ChainLD->getChain().getNode()))
13292 Queue.push_back(ChainLD->getChain().getNode());
13293 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13294 for (const SDUse &O : ChainNext->ops())
13295 if (!Visited.count(O.getNode()))
13296 Queue.push_back(O.getNode());
13297 } else
13298 LoadRoots.insert(ChainNext);
13299 }
13300
13301 // Second, search down the chain, starting from the top-level nodes recorded
13302 // in the first phase. These top-level nodes are the nodes just above all
13303 // loads and token factors. Starting with their uses, recursively look though
13304 // all loads (just the chain uses) and token factors to find a consecutive
13305 // load.
13306 Visited.clear();
13307 Queue.clear();
13308
13309 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
13310 IE = LoadRoots.end(); I != IE; ++I) {
13311 Queue.push_back(*I);
13312
13313 while (!Queue.empty()) {
13314 SDNode *LoadRoot = Queue.pop_back_val();
13315 if (!Visited.insert(LoadRoot).second)
13316 continue;
13317
13318 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13319 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13320 return true;
13321
13322 for (SDNode *U : LoadRoot->uses())
13323 if (((isa<MemSDNode>(U) &&
13324 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13325 U->getOpcode() == ISD::TokenFactor) &&
13326 !Visited.count(U))
13327 Queue.push_back(U);
13328 }
13329 }
13330
13331 return false;
13332}
13333
13334/// This function is called when we have proved that a SETCC node can be replaced
13335/// by subtraction (and other supporting instructions) so that the result of
13336/// comparison is kept in a GPR instead of CR. This function is purely for
13337/// codegen purposes and has some flags to guide the codegen process.
13338static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13339 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13340 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13341
13342 // Zero extend the operands to the largest legal integer. Originally, they
13343 // must be of a strictly smaller size.
13344 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13345 DAG.getConstant(Size, DL, MVT::i32));
13346 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13347 DAG.getConstant(Size, DL, MVT::i32));
13348
13349 // Swap if needed. Depends on the condition code.
13350 if (Swap)
13351 std::swap(Op0, Op1);
13352
13353 // Subtract extended integers.
13354 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13355
13356 // Move the sign bit to the least significant position and zero out the rest.
13357 // Now the least significant bit carries the result of original comparison.
13358 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13359 DAG.getConstant(Size - 1, DL, MVT::i32));
13360 auto Final = Shifted;
13361
13362 // Complement the result if needed. Based on the condition code.
13363 if (Complement)
13364 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13365 DAG.getConstant(1, DL, MVT::i64));
13366
13367 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13368}
13369
13370SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13371 DAGCombinerInfo &DCI) const {
13372 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13373
13374 SelectionDAG &DAG = DCI.DAG;
13375 SDLoc DL(N);
13376
13377 // Size of integers being compared has a critical role in the following
13378 // analysis, so we prefer to do this when all types are legal.
13379 if (!DCI.isAfterLegalizeDAG())
13380 return SDValue();
13381
13382 // If all users of SETCC extend its value to a legal integer type
13383 // then we replace SETCC with a subtraction
13384 for (const SDNode *U : N->uses())
13385 if (U->getOpcode() != ISD::ZERO_EXTEND)
13386 return SDValue();
13387
13388 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13389 auto OpSize = N->getOperand(0).getValueSizeInBits();
13390
13392
13393 if (OpSize < Size) {
13394 switch (CC) {
13395 default: break;
13396 case ISD::SETULT:
13397 return generateEquivalentSub(N, Size, false, false, DL, DAG);
13398 case ISD::SETULE:
13399 return generateEquivalentSub(N, Size, true, true, DL, DAG);
13400 case ISD::SETUGT:
13401 return generateEquivalentSub(N, Size, false, true, DL, DAG);
13402 case ISD::SETUGE:
13403 return generateEquivalentSub(N, Size, true, false, DL, DAG);
13404 }
13405 }
13406
13407 return SDValue();
13408}
13409
13410SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13411 DAGCombinerInfo &DCI) const {
13412 SelectionDAG &DAG = DCI.DAG;
13413 SDLoc dl(N);
13414
13415 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13416 // If we're tracking CR bits, we need to be careful that we don't have:
13417 // trunc(binary-ops(zext(x), zext(y)))
13418 // or
13419 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13420 // such that we're unnecessarily moving things into GPRs when it would be
13421 // better to keep them in CR bits.
13422
13423 // Note that trunc here can be an actual i1 trunc, or can be the effective
13424 // truncation that comes from a setcc or select_cc.
13425 if (N->getOpcode() == ISD::TRUNCATE &&
13426 N->getValueType(0) != MVT::i1)
13427 return SDValue();
13428
13429 if (N->getOperand(0).getValueType() != MVT::i32 &&
13430 N->getOperand(0).getValueType() != MVT::i64)
13431 return SDValue();
13432
13433 if (N->getOpcode() == ISD::SETCC ||
13434 N->getOpcode() == ISD::SELECT_CC) {
13435 // If we're looking at a comparison, then we need to make sure that the
13436 // high bits (all except for the first) don't matter the result.
13437 ISD::CondCode CC =
13438 cast<CondCodeSDNode>(N->getOperand(
13439 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13440 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13441
13442 if (ISD::isSignedIntSetCC(CC)) {
13443 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13444 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13445 return SDValue();
13446 } else if (ISD::isUnsignedIntSetCC(CC)) {
13447 if (!DAG.MaskedValueIsZero(N->getOperand(0),
13448 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13449 !DAG.MaskedValueIsZero(N->getOperand(1),
13450 APInt::getHighBitsSet(OpBits, OpBits-1)))
13451 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13452 : SDValue());
13453 } else {
13454 // This is neither a signed nor an unsigned comparison, just make sure
13455 // that the high bits are equal.
13456 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13457 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13458
13459 // We don't really care about what is known about the first bit (if
13460 // anything), so pretend that it is known zero for both to ensure they can
13461 // be compared as constants.
13462 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
13463 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
13464
13465 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
13466 Op1Known.getConstant() != Op2Known.getConstant())
13467 return SDValue();
13468 }
13469 }
13470
13471 // We now know that the higher-order bits are irrelevant, we just need to
13472 // make sure that all of the intermediate operations are bit operations, and
13473 // all inputs are extensions.
13474 if (N->getOperand(0).getOpcode() != ISD::AND &&
13475 N->getOperand(0).getOpcode() != ISD::OR &&
13476 N->getOperand(0).getOpcode() != ISD::XOR &&
13477 N->getOperand(0).getOpcode() != ISD::SELECT &&
13478 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13479 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13480 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13481 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13482 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13483 return SDValue();
13484
13485 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13486 N->getOperand(1).getOpcode() != ISD::AND &&
13487 N->getOperand(1).getOpcode() != ISD::OR &&
13488 N->getOperand(1).getOpcode() != ISD::XOR &&
13489 N->getOperand(1).getOpcode() != ISD::SELECT &&
13490 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13491 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13492 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13493 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13494 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13495 return SDValue();
13496
13498 SmallVector<SDValue, 8> BinOps, PromOps;
13500
13501 for (unsigned i = 0; i < 2; ++i) {
13502 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13503 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13504 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13505 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13506 isa<ConstantSDNode>(N->getOperand(i)))
13507 Inputs.push_back(N->getOperand(i));
13508 else
13509 BinOps.push_back(N->getOperand(i));
13510
13511 if (N->getOpcode() == ISD::TRUNCATE)
13512 break;
13513 }
13514
13515 // Visit all inputs, collect all binary operations (and, or, xor and
13516 // select) that are all fed by extensions.
13517 while (!BinOps.empty()) {
13518 SDValue BinOp = BinOps.pop_back_val();
13519
13520 if (!Visited.insert(BinOp.getNode()).second)
13521 continue;
13522
13523 PromOps.push_back(BinOp);
13524
13525 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13526 // The condition of the select is not promoted.
13527 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13528 continue;
13529 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13530 continue;
13531
13532 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13533 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13534 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13535 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13536 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13537 Inputs.push_back(BinOp.getOperand(i));
13538 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13539 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13540 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13541 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13542 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13543 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13544 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13545 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13546 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13547 BinOps.push_back(BinOp.getOperand(i));
13548 } else {
13549 // We have an input that is not an extension or another binary
13550 // operation; we'll abort this transformation.
13551 return SDValue();
13552 }
13553 }
13554 }
13555
13556 // Make sure that this is a self-contained cluster of operations (which
13557 // is not quite the same thing as saying that everything has only one
13558 // use).
13559 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13560 if (isa<ConstantSDNode>(Inputs[i]))
13561 continue;
13562
13563 for (const SDNode *User : Inputs[i].getNode()->uses()) {
13564 if (User != N && !Visited.count(User))
13565 return SDValue();
13566
13567 // Make sure that we're not going to promote the non-output-value
13568 // operand(s) or SELECT or SELECT_CC.
13569 // FIXME: Although we could sometimes handle this, and it does occur in
13570 // practice that one of the condition inputs to the select is also one of
13571 // the outputs, we currently can't deal with this.
13572 if (User->getOpcode() == ISD::SELECT) {
13573 if (User->getOperand(0) == Inputs[i])
13574 return SDValue();
13575 } else if (User->getOpcode() == ISD::SELECT_CC) {
13576 if (User->getOperand(0) == Inputs[i] ||
13577 User->getOperand(1) == Inputs[i])
13578 return SDValue();
13579 }
13580 }
13581 }
13582
13583 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13584 for (const SDNode *User : PromOps[i].getNode()->uses()) {
13585 if (User != N && !Visited.count(User))
13586 return SDValue();
13587
13588 // Make sure that we're not going to promote the non-output-value
13589 // operand(s) or SELECT or SELECT_CC.
13590 // FIXME: Although we could sometimes handle this, and it does occur in
13591 // practice that one of the condition inputs to the select is also one of
13592 // the outputs, we currently can't deal with this.
13593 if (User->getOpcode() == ISD::SELECT) {
13594 if (User->getOperand(0) == PromOps[i])
13595 return SDValue();
13596 } else if (User->getOpcode() == ISD::SELECT_CC) {
13597 if (User->getOperand(0) == PromOps[i] ||
13598 User->getOperand(1) == PromOps[i])
13599 return SDValue();
13600 }
13601 }
13602 }
13603
13604 // Replace all inputs with the extension operand.
13605 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13606 // Constants may have users outside the cluster of to-be-promoted nodes,
13607 // and so we need to replace those as we do the promotions.
13608 if (isa<ConstantSDNode>(Inputs[i]))
13609 continue;
13610 else
13611 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13612 }
13613
13614 std::list<HandleSDNode> PromOpHandles;
13615 for (auto &PromOp : PromOps)
13616 PromOpHandles.emplace_back(PromOp);
13617
13618 // Replace all operations (these are all the same, but have a different
13619 // (i1) return type). DAG.getNode will validate that the types of
13620 // a binary operator match, so go through the list in reverse so that
13621 // we've likely promoted both operands first. Any intermediate truncations or
13622 // extensions disappear.
13623 while (!PromOpHandles.empty()) {
13624 SDValue PromOp = PromOpHandles.back().getValue();
13625 PromOpHandles.pop_back();
13626
13627 if (PromOp.getOpcode() == ISD::TRUNCATE ||
13628 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13629 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13630 PromOp.getOpcode() == ISD::ANY_EXTEND) {
13631 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13632 PromOp.getOperand(0).getValueType() != MVT::i1) {
13633 // The operand is not yet ready (see comment below).
13634 PromOpHandles.emplace_front(PromOp);
13635 continue;
13636 }
13637
13638 SDValue RepValue = PromOp.getOperand(0);
13639 if (isa<ConstantSDNode>(RepValue))
13640 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13641
13642 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13643 continue;
13644 }
13645
13646 unsigned C;
13647 switch (PromOp.getOpcode()) {
13648 default: C = 0; break;
13649 case ISD::SELECT: C = 1; break;
13650 case ISD::SELECT_CC: C = 2; break;
13651 }
13652
13653 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13654 PromOp.getOperand(C).getValueType() != MVT::i1) ||
13655 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13656 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13657 // The to-be-promoted operands of this node have not yet been
13658 // promoted (this should be rare because we're going through the
13659 // list backward, but if one of the operands has several users in
13660 // this cluster of to-be-promoted nodes, it is possible).
13661 PromOpHandles.emplace_front(PromOp);
13662 continue;
13663 }
13664
13666 PromOp.getNode()->op_end());
13667
13668 // If there are any constant inputs, make sure they're replaced now.
13669 for (unsigned i = 0; i < 2; ++i)
13670 if (isa<ConstantSDNode>(Ops[C+i]))
13671 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13672
13673 DAG.ReplaceAllUsesOfValueWith(PromOp,
13674 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13675 }
13676
13677 // Now we're left with the initial truncation itself.
13678 if (N->getOpcode() == ISD::TRUNCATE)
13679 return N->getOperand(0);
13680
13681 // Otherwise, this is a comparison. The operands to be compared have just
13682 // changed type (to i1), but everything else is the same.
13683 return SDValue(N, 0);
13684}
13685
13686SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13687 DAGCombinerInfo &DCI) const {
13688 SelectionDAG &DAG = DCI.DAG;
13689 SDLoc dl(N);
13690
13691 // If we're tracking CR bits, we need to be careful that we don't have:
13692 // zext(binary-ops(trunc(x), trunc(y)))
13693 // or
13694 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13695 // such that we're unnecessarily moving things into CR bits that can more
13696 // efficiently stay in GPRs. Note that if we're not certain that the high
13697 // bits are set as required by the final extension, we still may need to do
13698 // some masking to get the proper behavior.
13699
13700 // This same functionality is important on PPC64 when dealing with
13701 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13702 // the return values of functions. Because it is so similar, it is handled
13703 // here as well.
13704
13705 if (N->getValueType(0) != MVT::i32 &&
13706 N->getValueType(0) != MVT::i64)
13707 return SDValue();
13708
13709 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13710 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13711 return SDValue();
13712
13713 if (N->getOperand(0).getOpcode() != ISD::AND &&
13714 N->getOperand(0).getOpcode() != ISD::OR &&
13715 N->getOperand(0).getOpcode() != ISD::XOR &&
13716 N->getOperand(0).getOpcode() != ISD::SELECT &&
13717 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13718 return SDValue();
13719
13721 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13723
13724 // Visit all inputs, collect all binary operations (and, or, xor and
13725 // select) that are all fed by truncations.
13726 while (!BinOps.empty()) {
13727 SDValue BinOp = BinOps.pop_back_val();
13728
13729 if (!Visited.insert(BinOp.getNode()).second)
13730 continue;
13731
13732 PromOps.push_back(BinOp);
13733
13734 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13735 // The condition of the select is not promoted.
13736 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13737 continue;
13738 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13739 continue;
13740
13741 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13742 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13743 Inputs.push_back(BinOp.getOperand(i));
13744 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13745 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13746 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13747 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13748 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13749 BinOps.push_back(BinOp.getOperand(i));
13750 } else {
13751 // We have an input that is not a truncation or another binary
13752 // operation; we'll abort this transformation.
13753 return SDValue();
13754 }
13755 }
13756 }
13757
13758 // The operands of a select that must be truncated when the select is
13759 // promoted because the operand is actually part of the to-be-promoted set.
13760 DenseMap<SDNode *, EVT> SelectTruncOp[2];
13761
13762 // Make sure that this is a self-contained cluster of operations (which
13763 // is not quite the same thing as saying that everything has only one
13764 // use).
13765 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13766 if (isa<ConstantSDNode>(Inputs[i]))
13767 continue;
13768
13769 for (SDNode *User : Inputs[i].getNode()->uses()) {
13770 if (User != N && !Visited.count(User))
13771 return SDValue();
13772
13773 // If we're going to promote the non-output-value operand(s) or SELECT or
13774 // SELECT_CC, record them for truncation.
13775 if (User->getOpcode() == ISD::SELECT) {
13776 if (User->getOperand(0) == Inputs[i])
13777 SelectTruncOp[0].insert(std::make_pair(User,
13778 User->getOperand(0).getValueType()));
13779 } else if (User->getOpcode() == ISD::SELECT_CC) {
13780 if (User->getOperand(0) == Inputs[i])
13781 SelectTruncOp[0].insert(std::make_pair(User,
13782 User->getOperand(0).getValueType()));
13783 if (User->getOperand(1) == Inputs[i])
13784 SelectTruncOp[1].insert(std::make_pair(User,
13785 User->getOperand(1).getValueType()));
13786 }
13787 }
13788 }
13789
13790 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13791 for (SDNode *User : PromOps[i].getNode()->uses()) {
13792 if (User != N && !Visited.count(User))
13793 return SDValue();
13794
13795 // If we're going to promote the non-output-value operand(s) or SELECT or
13796 // SELECT_CC, record them for truncation.
13797 if (User->getOpcode() == ISD::SELECT) {
13798 if (User->getOperand(0) == PromOps[i])
13799 SelectTruncOp[0].insert(std::make_pair(User,
13800 User->getOperand(0).getValueType()));
13801 } else if (User->getOpcode() == ISD::SELECT_CC) {
13802 if (User->getOperand(0) == PromOps[i])
13803 SelectTruncOp[0].insert(std::make_pair(User,
13804 User->getOperand(0).getValueType()));
13805 if (User->getOperand(1) == PromOps[i])
13806 SelectTruncOp[1].insert(std::make_pair(User,
13807 User->getOperand(1).getValueType()));
13808 }
13809 }
13810 }
13811
13812 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13813 bool ReallyNeedsExt = false;
13814 if (N->getOpcode() != ISD::ANY_EXTEND) {
13815 // If all of the inputs are not already sign/zero extended, then
13816 // we'll still need to do that at the end.
13817 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13818 if (isa<ConstantSDNode>(Inputs[i]))
13819 continue;
13820
13821 unsigned OpBits =
13822 Inputs[i].getOperand(0).getValueSizeInBits();
13823 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13824
13825 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13826 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13827 APInt::getHighBitsSet(OpBits,
13828 OpBits-PromBits))) ||
13829 (N->getOpcode() == ISD::SIGN_EXTEND &&
13830 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13831 (OpBits-(PromBits-1)))) {
13832 ReallyNeedsExt = true;
13833 break;
13834 }
13835 }
13836 }
13837
13838 // Replace all inputs, either with the truncation operand, or a
13839 // truncation or extension to the final output type.
13840 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13841 // Constant inputs need to be replaced with the to-be-promoted nodes that
13842 // use them because they might have users outside of the cluster of
13843 // promoted nodes.
13844 if (isa<ConstantSDNode>(Inputs[i]))
13845 continue;
13846
13847 SDValue InSrc = Inputs[i].getOperand(0);
13848 if (Inputs[i].getValueType() == N->getValueType(0))
13849 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13850 else if (N->getOpcode() == ISD::SIGN_EXTEND)
13851 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13852 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13853 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13854 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13855 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13856 else
13857 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13858 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13859 }
13860
13861 std::list<HandleSDNode> PromOpHandles;
13862 for (auto &PromOp : PromOps)
13863 PromOpHandles.emplace_back(PromOp);
13864
13865 // Replace all operations (these are all the same, but have a different
13866 // (promoted) return type). DAG.getNode will validate that the types of
13867 // a binary operator match, so go through the list in reverse so that
13868 // we've likely promoted both operands first.
13869 while (!PromOpHandles.empty()) {
13870 SDValue PromOp = PromOpHandles.back().getValue();
13871 PromOpHandles.pop_back();
13872
13873 unsigned C;
13874 switch (PromOp.getOpcode()) {
13875 default: C = 0; break;
13876 case ISD::SELECT: C = 1; break;
13877 case ISD::SELECT_CC: C = 2; break;
13878 }
13879
13880 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13881 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13882 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13883 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13884 // The to-be-promoted operands of this node have not yet been
13885 // promoted (this should be rare because we're going through the
13886 // list backward, but if one of the operands has several users in
13887 // this cluster of to-be-promoted nodes, it is possible).
13888 PromOpHandles.emplace_front(PromOp);
13889 continue;
13890 }
13891
13892 // For SELECT and SELECT_CC nodes, we do a similar check for any
13893 // to-be-promoted comparison inputs.
13894 if (PromOp.getOpcode() == ISD::SELECT ||
13895 PromOp.getOpcode() == ISD::SELECT_CC) {
13896 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13897 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13898 (SelectTruncOp[1].count(PromOp.getNode()) &&
13899 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13900 PromOpHandles.emplace_front(PromOp);
13901 continue;
13902 }
13903 }
13904
13906 PromOp.getNode()->op_end());
13907
13908 // If this node has constant inputs, then they'll need to be promoted here.
13909 for (unsigned i = 0; i < 2; ++i) {
13910 if (!isa<ConstantSDNode>(Ops[C+i]))
13911 continue;
13912 if (Ops[C+i].getValueType() == N->getValueType(0))
13913 continue;
13914
13915 if (N->getOpcode() == ISD::SIGN_EXTEND)
13916 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13917 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13918 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13919 else
13920 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13921 }
13922
13923 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13924 // truncate them again to the original value type.
13925 if (PromOp.getOpcode() == ISD::SELECT ||
13926 PromOp.getOpcode() == ISD::SELECT_CC) {
13927 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13928 if (SI0 != SelectTruncOp[0].end())
13929 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13930 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13931 if (SI1 != SelectTruncOp[1].end())
13932 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13933 }
13934
13935 DAG.ReplaceAllUsesOfValueWith(PromOp,
13936 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13937 }
13938
13939 // Now we're left with the initial extension itself.
13940 if (!ReallyNeedsExt)
13941 return N->getOperand(0);
13942
13943 // To zero extend, just mask off everything except for the first bit (in the
13944 // i1 case).
13945 if (N->getOpcode() == ISD::ZERO_EXTEND)
13946 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13948 N->getValueSizeInBits(0), PromBits),
13949 dl, N->getValueType(0)));
13950
13951 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13952 "Invalid extension type");
13953 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13954 SDValue ShiftCst =
13955 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13956 return DAG.getNode(
13957 ISD::SRA, dl, N->getValueType(0),
13958 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13959 ShiftCst);
13960}
13961
13962SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13963 DAGCombinerInfo &DCI) const {
13964 assert(N->getOpcode() == ISD::SETCC &&
13965 "Should be called with a SETCC node");
13966
13967 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13968 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13969 SDValue LHS = N->getOperand(0);
13970 SDValue RHS = N->getOperand(1);
13971
13972 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13973 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13974 LHS.hasOneUse())
13975 std::swap(LHS, RHS);
13976
13977 // x == 0-y --> x+y == 0
13978 // x != 0-y --> x+y != 0
13979 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13980 RHS.hasOneUse()) {
13981 SDLoc DL(N);
13982 SelectionDAG &DAG = DCI.DAG;
13983 EVT VT = N->getValueType(0);
13984 EVT OpVT = LHS.getValueType();
13985 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13986 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13987 }
13988 }
13989
13990 return DAGCombineTruncBoolExt(N, DCI);
13991}
13992
13993// Is this an extending load from an f32 to an f64?
13994static bool isFPExtLoad(SDValue Op) {
13995 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13996 return LD->getExtensionType() == ISD::EXTLOAD &&
13997 Op.getValueType() == MVT::f64;
13998 return false;
13999}
14000
14001/// Reduces the number of fp-to-int conversion when building a vector.
14002///
14003/// If this vector is built out of floating to integer conversions,
14004/// transform it to a vector built out of floating point values followed by a
14005/// single floating to integer conversion of the vector.
14006/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14007/// becomes (fptosi (build_vector ($A, $B, ...)))
14008SDValue PPCTargetLowering::
14009combineElementTruncationToVectorTruncation(SDNode *N,
14010 DAGCombinerInfo &DCI) const {
14011 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14012 "Should be called with a BUILD_VECTOR node");
14013
14014 SelectionDAG &DAG = DCI.DAG;
14015 SDLoc dl(N);
14016
14017 SDValue FirstInput = N->getOperand(0);
14018 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14019 "The input operand must be an fp-to-int conversion.");
14020
14021 // This combine happens after legalization so the fp_to_[su]i nodes are
14022 // already converted to PPCSISD nodes.
14023 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14024 if (FirstConversion == PPCISD::FCTIDZ ||
14025 FirstConversion == PPCISD::FCTIDUZ ||
14026 FirstConversion == PPCISD::FCTIWZ ||
14027 FirstConversion == PPCISD::FCTIWUZ) {
14028 bool IsSplat = true;
14029 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14030 FirstConversion == PPCISD::FCTIWUZ;
14031 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14033 EVT TargetVT = N->getValueType(0);
14034 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14035 SDValue NextOp = N->getOperand(i);
14036 if (NextOp.getOpcode() != PPCISD::MFVSR)
14037 return SDValue();
14038 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14039 if (NextConversion != FirstConversion)
14040 return SDValue();
14041 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14042 // This is not valid if the input was originally double precision. It is
14043 // also not profitable to do unless this is an extending load in which
14044 // case doing this combine will allow us to combine consecutive loads.
14045 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14046 return SDValue();
14047 if (N->getOperand(i) != FirstInput)
14048 IsSplat = false;
14049 }
14050
14051 // If this is a splat, we leave it as-is since there will be only a single
14052 // fp-to-int conversion followed by a splat of the integer. This is better
14053 // for 32-bit and smaller ints and neutral for 64-bit ints.
14054 if (IsSplat)
14055 return SDValue();
14056
14057 // Now that we know we have the right type of node, get its operands
14058 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14059 SDValue In = N->getOperand(i).getOperand(0);
14060 if (Is32Bit) {
14061 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14062 // here, we know that all inputs are extending loads so this is safe).
14063 if (In.isUndef())
14064 Ops.push_back(DAG.getUNDEF(SrcVT));
14065 else {
14066 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
14067 MVT::f32, In.getOperand(0),
14068 DAG.getIntPtrConstant(1, dl));
14069 Ops.push_back(Trunc);
14070 }
14071 } else
14072 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14073 }
14074
14075 unsigned Opcode;
14076 if (FirstConversion == PPCISD::FCTIDZ ||
14077 FirstConversion == PPCISD::FCTIWZ)
14079 else
14081
14082 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14083 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14084 return DAG.getNode(Opcode, dl, TargetVT, BV);
14085 }
14086 return SDValue();
14087}
14088
14089/// Reduce the number of loads when building a vector.
14090///
14091/// Building a vector out of multiple loads can be converted to a load
14092/// of the vector type if the loads are consecutive. If the loads are
14093/// consecutive but in descending order, a shuffle is added at the end
14094/// to reorder the vector.
14096 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14097 "Should be called with a BUILD_VECTOR node");
14098
14099 SDLoc dl(N);
14100
14101 // Return early for non byte-sized type, as they can't be consecutive.
14102 if (!N->getValueType(0).getVectorElementType().isByteSized())
14103 return SDValue();
14104
14105 bool InputsAreConsecutiveLoads = true;
14106 bool InputsAreReverseConsecutive = true;
14107 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14108 SDValue FirstInput = N->getOperand(0);
14109 bool IsRoundOfExtLoad = false;
14110
14111 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14112 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14113 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
14114 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
14115 }
14116 // Not a build vector of (possibly fp_rounded) loads.
14117 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14118 N->getNumOperands() == 1)
14119 return SDValue();
14120
14121 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14122 // If any inputs are fp_round(extload), they all must be.
14123 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14124 return SDValue();
14125
14126 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14127 N->getOperand(i);
14128 if (NextInput.getOpcode() != ISD::LOAD)
14129 return SDValue();
14130
14131 SDValue PreviousInput =
14132 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14133 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
14134 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
14135
14136 // If any inputs are fp_round(extload), they all must be.
14137 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14138 return SDValue();
14139
14140 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
14141 InputsAreConsecutiveLoads = false;
14142 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
14143 InputsAreReverseConsecutive = false;
14144
14145 // Exit early if the loads are neither consecutive nor reverse consecutive.
14146 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14147 return SDValue();
14148 }
14149
14150 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14151 "The loads cannot be both consecutive and reverse consecutive.");
14152
14153 SDValue FirstLoadOp =
14154 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
14155 SDValue LastLoadOp =
14156 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
14157 N->getOperand(N->getNumOperands()-1);
14158
14159 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
14160 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
14161 if (InputsAreConsecutiveLoads) {
14162 assert(LD1 && "Input needs to be a LoadSDNode.");
14163 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
14164 LD1->getBasePtr(), LD1->getPointerInfo(),
14165 LD1->getAlign());
14166 }
14167 if (InputsAreReverseConsecutive) {
14168 assert(LDL && "Input needs to be a LoadSDNode.");
14169 SDValue Load =
14170 DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(),
14171 LDL->getPointerInfo(), LDL->getAlign());
14173 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14174 Ops.push_back(i);
14175
14176 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
14177 DAG.getUNDEF(N->getValueType(0)), Ops);
14178 }
14179 return SDValue();
14180}
14181
14182// This function adds the required vector_shuffle needed to get
14183// the elements of the vector extract in the correct position
14184// as specified by the CorrectElems encoding.
14186 SDValue Input, uint64_t Elems,
14187 uint64_t CorrectElems) {
14188 SDLoc dl(N);
14189
14190 unsigned NumElems = Input.getValueType().getVectorNumElements();
14191 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14192
14193 // Knowing the element indices being extracted from the original
14194 // vector and the order in which they're being inserted, just put
14195 // them at element indices required for the instruction.
14196 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14197 if (DAG.getDataLayout().isLittleEndian())
14198 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14199 else
14200 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14201 CorrectElems = CorrectElems >> 8;
14202 Elems = Elems >> 8;
14203 }
14204
14205 SDValue Shuffle =
14206 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14207 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14208
14209 EVT VT = N->getValueType(0);
14210 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14211
14212 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14213 Input.getValueType().getVectorElementType(),
14215 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14216 DAG.getValueType(ExtVT));
14217}
14218
14219// Look for build vector patterns where input operands come from sign
14220// extended vector_extract elements of specific indices. If the correct indices
14221// aren't used, add a vector shuffle to fix up the indices and create
14222// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14223// during instruction selection.
14225 // This array encodes the indices that the vector sign extend instructions
14226 // extract from when extending from one type to another for both BE and LE.
14227 // The right nibble of each byte corresponds to the LE incides.
14228 // and the left nibble of each byte corresponds to the BE incides.
14229 // For example: 0x3074B8FC byte->word
14230 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14231 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14232 // For example: 0x000070F8 byte->double word
14233 // For LE: the allowed indices are: 0x0,0x8
14234 // For BE: the allowed indices are: 0x7,0xF
14235 uint64_t TargetElems[] = {
14236 0x3074B8FC, // b->w
14237 0x000070F8, // b->d
14238 0x10325476, // h->w
14239 0x00003074, // h->d
14240 0x00001032, // w->d
14241 };
14242
14243 uint64_t Elems = 0;
14244 int Index;
14245 SDValue Input;
14246
14247 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14248 if (!Op)
14249 return false;
14250 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14251 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14252 return false;
14253
14254 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14255 // of the right width.
14256 SDValue Extract = Op.getOperand(0);
14257 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14258 Extract = Extract.getOperand(0);
14259 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14260 return false;
14261
14263 if (!ExtOp)
14264 return false;
14265
14266 Index = ExtOp->getZExtValue();
14267 if (Input && Input != Extract.getOperand(0))
14268 return false;
14269
14270 if (!Input)
14271 Input = Extract.getOperand(0);
14272
14273 Elems = Elems << 8;
14274 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14275 Elems |= Index;
14276
14277 return true;
14278 };
14279
14280 // If the build vector operands aren't sign extended vector extracts,
14281 // of the same input vector, then return.
14282 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14283 if (!isSExtOfVecExtract(N->getOperand(i))) {
14284 return SDValue();
14285 }
14286 }
14287
14288 // If the vector extract indicies are not correct, add the appropriate
14289 // vector_shuffle.
14290 int TgtElemArrayIdx;
14291 int InputSize = Input.getValueType().getScalarSizeInBits();
14292 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14293 if (InputSize + OutputSize == 40)
14294 TgtElemArrayIdx = 0;
14295 else if (InputSize + OutputSize == 72)
14296 TgtElemArrayIdx = 1;
14297 else if (InputSize + OutputSize == 48)
14298 TgtElemArrayIdx = 2;
14299 else if (InputSize + OutputSize == 80)
14300 TgtElemArrayIdx = 3;
14301 else if (InputSize + OutputSize == 96)
14302 TgtElemArrayIdx = 4;
14303 else
14304 return SDValue();
14305
14306 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14307 CorrectElems = DAG.getDataLayout().isLittleEndian()
14308 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14309 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14310 if (Elems != CorrectElems) {
14311 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14312 }
14313
14314 // Regular lowering will catch cases where a shuffle is not needed.
14315 return SDValue();
14316}
14317
14318// Look for the pattern of a load from a narrow width to i128, feeding
14319// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14320// (LXVRZX). This node represents a zero extending load that will be matched
14321// to the Load VSX Vector Rightmost instructions.
14323 SDLoc DL(N);
14324
14325 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14326 if (N->getValueType(0) != MVT::v1i128)
14327 return SDValue();
14328
14329 SDValue Operand = N->getOperand(0);
14330 // Proceed with the transformation if the operand to the BUILD_VECTOR
14331 // is a load instruction.
14332 if (Operand.getOpcode() != ISD::LOAD)
14333 return SDValue();
14334
14335 auto *LD = cast<LoadSDNode>(Operand);
14336 EVT MemoryType = LD->getMemoryVT();
14337
14338 // This transformation is only valid if the we are loading either a byte,
14339 // halfword, word, or doubleword.
14340 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14341 MemoryType == MVT::i32 || MemoryType == MVT::i64;
14342
14343 // Ensure that the load from the narrow width is being zero extended to i128.
14344 if (!ValidLDType ||
14345 (LD->getExtensionType() != ISD::ZEXTLOAD &&
14346 LD->getExtensionType() != ISD::EXTLOAD))
14347 return SDValue();
14348
14349 SDValue LoadOps[] = {
14350 LD->getChain(), LD->getBasePtr(),
14351 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14352
14355 LoadOps, MemoryType, LD->getMemOperand());
14356}
14357
14358SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14359 DAGCombinerInfo &DCI) const {
14360 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14361 "Should be called with a BUILD_VECTOR node");
14362
14363 SelectionDAG &DAG = DCI.DAG;
14364 SDLoc dl(N);
14365
14366 if (!Subtarget.hasVSX())
14367 return SDValue();
14368
14369 // The target independent DAG combiner will leave a build_vector of
14370 // float-to-int conversions intact. We can generate MUCH better code for
14371 // a float-to-int conversion of a vector of floats.
14372 SDValue FirstInput = N->getOperand(0);
14373 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14374 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14375 if (Reduced)
14376 return Reduced;
14377 }
14378
14379 // If we're building a vector out of consecutive loads, just load that
14380 // vector type.
14381 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14382 if (Reduced)
14383 return Reduced;
14384
14385 // If we're building a vector out of extended elements from another vector
14386 // we have P9 vector integer extend instructions. The code assumes legal
14387 // input types (i.e. it can't handle things like v4i16) so do not run before
14388 // legalization.
14389 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14390 Reduced = combineBVOfVecSExt(N, DAG);
14391 if (Reduced)
14392 return Reduced;
14393 }
14394
14395 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14396 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14397 // is a load from <valid narrow width> to i128.
14398 if (Subtarget.isISA3_1()) {
14399 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14400 if (BVOfZLoad)
14401 return BVOfZLoad;
14402 }
14403
14404 if (N->getValueType(0) != MVT::v2f64)
14405 return SDValue();
14406
14407 // Looking for:
14408 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14409 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14410 FirstInput.getOpcode() != ISD::UINT_TO_FP)
14411 return SDValue();
14412 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14413 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14414 return SDValue();
14415 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14416 return SDValue();
14417
14418 SDValue Ext1 = FirstInput.getOperand(0);
14419 SDValue Ext2 = N->getOperand(1).getOperand(0);
14420 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14422 return SDValue();
14423
14426 if (!Ext1Op || !Ext2Op)
14427 return SDValue();
14428 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14429 Ext1.getOperand(0) != Ext2.getOperand(0))
14430 return SDValue();
14431
14432 int FirstElem = Ext1Op->getZExtValue();
14433 int SecondElem = Ext2Op->getZExtValue();
14434 int SubvecIdx;
14435 if (FirstElem == 0 && SecondElem == 1)
14436 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14437 else if (FirstElem == 2 && SecondElem == 3)
14438 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14439 else
14440 return SDValue();
14441
14442 SDValue SrcVec = Ext1.getOperand(0);
14443 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14445 return DAG.getNode(NodeType, dl, MVT::v2f64,
14446 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14447}
14448
14449SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14450 DAGCombinerInfo &DCI) const {
14451 assert((N->getOpcode() == ISD::SINT_TO_FP ||
14452 N->getOpcode() == ISD::UINT_TO_FP) &&
14453 "Need an int -> FP conversion node here");
14454
14455 if (useSoftFloat() || !Subtarget.has64BitSupport())
14456 return SDValue();
14457
14458 SelectionDAG &DAG = DCI.DAG;
14459 SDLoc dl(N);
14460 SDValue Op(N, 0);
14461
14462 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14463 // from the hardware.
14464 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14465 return SDValue();
14466 if (!Op.getOperand(0).getValueType().isSimple())
14467 return SDValue();
14468 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14469 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14470 return SDValue();
14471
14472 SDValue FirstOperand(Op.getOperand(0));
14473 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14474 (FirstOperand.getValueType() == MVT::i8 ||
14475 FirstOperand.getValueType() == MVT::i16);
14476 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14477 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14478 bool DstDouble = Op.getValueType() == MVT::f64;
14479 unsigned ConvOp = Signed ?
14480 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14481 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14482 SDValue WidthConst =
14483 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14484 dl, false);
14485 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14486 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14489 Ops, MVT::i8, LDN->getMemOperand());
14490
14491 // For signed conversion, we need to sign-extend the value in the VSR
14492 if (Signed) {
14493 SDValue ExtOps[] = { Ld, WidthConst };
14494 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14495 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14496 } else
14497 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14498 }
14499
14500
14501 // For i32 intermediate values, unfortunately, the conversion functions
14502 // leave the upper 32 bits of the value are undefined. Within the set of
14503 // scalar instructions, we have no method for zero- or sign-extending the
14504 // value. Thus, we cannot handle i32 intermediate values here.
14505 if (Op.getOperand(0).getValueType() == MVT::i32)
14506 return SDValue();
14507
14508 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14509 "UINT_TO_FP is supported only with FPCVT");
14510
14511 // If we have FCFIDS, then use it when converting to single-precision.
14512 // Otherwise, convert to double-precision and then round.
14513 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14514 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14516 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14517 : PPCISD::FCFID);
14518 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14519 ? MVT::f32
14520 : MVT::f64;
14521
14522 // If we're converting from a float, to an int, and back to a float again,
14523 // then we don't need the store/load pair at all.
14524 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14525 Subtarget.hasFPCVT()) ||
14526 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14527 SDValue Src = Op.getOperand(0).getOperand(0);
14528 if (Src.getValueType() == MVT::f32) {
14529 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14530 DCI.AddToWorklist(Src.getNode());
14531 } else if (Src.getValueType() != MVT::f64) {
14532 // Make sure that we don't pick up a ppc_fp128 source value.
14533 return SDValue();
14534 }
14535
14536 unsigned FCTOp =
14537 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14539
14540 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14541 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14542
14543 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14544 FP = DAG.getNode(ISD::FP_ROUND, dl,
14545 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14546 DCI.AddToWorklist(FP.getNode());
14547 }
14548
14549 return FP;
14550 }
14551
14552 return SDValue();
14553}
14554
14555// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14556// builtins) into loads with swaps.
14558 DAGCombinerInfo &DCI) const {
14559 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
14560 // load combines.
14561 if (DCI.isBeforeLegalizeOps())
14562 return SDValue();
14563
14564 SelectionDAG &DAG = DCI.DAG;
14565 SDLoc dl(N);
14566 SDValue Chain;
14567 SDValue Base;
14568 MachineMemOperand *MMO;
14569
14570 switch (N->getOpcode()) {
14571 default:
14572 llvm_unreachable("Unexpected opcode for little endian VSX load");
14573 case ISD::LOAD: {
14575 Chain = LD->getChain();
14576 Base = LD->getBasePtr();
14577 MMO = LD->getMemOperand();
14578 // If the MMO suggests this isn't a load of a full vector, leave
14579 // things alone. For a built-in, we have to make the change for
14580 // correctness, so if there is a size problem that will be a bug.
14581 if (MMO->getSize() < 16)
14582 return SDValue();
14583 break;
14584 }
14587 Chain = Intrin->getChain();
14588 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14589 // us what we want. Get operand 2 instead.
14590 Base = Intrin->getOperand(2);
14591 MMO = Intrin->getMemOperand();
14592 break;
14593 }
14594 }
14595
14596 MVT VecTy = N->getValueType(0).getSimpleVT();
14597
14598 SDValue LoadOps[] = { Chain, Base };
14601 LoadOps, MVT::v2f64, MMO);
14602
14603 DCI.AddToWorklist(Load.getNode());
14604 Chain = Load.getValue(1);
14605 SDValue Swap = DAG.getNode(
14606 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14607 DCI.AddToWorklist(Swap.getNode());
14608
14609 // Add a bitcast if the resulting load type doesn't match v2f64.
14610 if (VecTy != MVT::v2f64) {
14611 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14612 DCI.AddToWorklist(N.getNode());
14613 // Package {bitcast value, swap's chain} to match Load's shape.
14614 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14615 N, Swap.getValue(1));
14616 }
14617
14618 return Swap;
14619}
14620
14621// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14622// builtins) into stores with swaps.
14624 DAGCombinerInfo &DCI) const {
14625 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
14626 // store combines.
14627 if (DCI.isBeforeLegalizeOps())
14628 return SDValue();
14629
14630 SelectionDAG &DAG = DCI.DAG;
14631 SDLoc dl(N);
14632 SDValue Chain;
14633 SDValue Base;
14634 unsigned SrcOpnd;
14635 MachineMemOperand *MMO;
14636
14637 switch (N->getOpcode()) {
14638 default:
14639 llvm_unreachable("Unexpected opcode for little endian VSX store");
14640 case ISD::STORE: {
14642 Chain = ST->getChain();
14643 Base = ST->getBasePtr();
14644 MMO = ST->getMemOperand();
14645 SrcOpnd = 1;
14646 // If the MMO suggests this isn't a store of a full vector, leave
14647 // things alone. For a built-in, we have to make the change for
14648 // correctness, so if there is a size problem that will be a bug.
14649 if (MMO->getSize() < 16)
14650 return SDValue();
14651 break;
14652 }
14653 case ISD::INTRINSIC_VOID: {
14655 Chain = Intrin->getChain();
14656 // Intrin->getBasePtr() oddly does not get what we want.
14657 Base = Intrin->getOperand(3);
14658 MMO = Intrin->getMemOperand();
14659 SrcOpnd = 2;
14660 break;
14661 }
14662 }
14663
14664 SDValue Src = N->getOperand(SrcOpnd);
14665 MVT VecTy = Src.getValueType().getSimpleVT();
14666
14667 // All stores are done as v2f64 and possible bit cast.
14668 if (VecTy != MVT::v2f64) {
14669 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14670 DCI.AddToWorklist(Src.getNode());
14671 }
14672
14673 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14674 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14675 DCI.AddToWorklist(Swap.getNode());
14676 Chain = Swap.getValue(1);
14677 SDValue StoreOps[] = { Chain, Swap, Base };
14679 DAG.getVTList(MVT::Other),
14680 StoreOps, VecTy, MMO);
14681 DCI.AddToWorklist(Store.getNode());
14682 return Store;
14683}
14684
14685// Handle DAG combine for STORE (FP_TO_INT F).
14686SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14687 DAGCombinerInfo &DCI) const {
14688
14689 SelectionDAG &DAG = DCI.DAG;
14690 SDLoc dl(N);
14691 unsigned Opcode = N->getOperand(1).getOpcode();
14692
14693 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
14694 && "Not a FP_TO_INT Instruction!");
14695
14696 SDValue Val = N->getOperand(1).getOperand(0);
14697 EVT Op1VT = N->getOperand(1).getValueType();
14698 EVT ResVT = Val.getValueType();
14699
14700 if (!isTypeLegal(ResVT))
14701 return SDValue();
14702
14703 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14704 bool ValidTypeForStoreFltAsInt =
14705 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14706 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14707
14708 if (ResVT == MVT::f128 && !Subtarget.hasP9Vector())
14709 return SDValue();
14710
14711 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14712 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14713 return SDValue();
14714
14715 // Extend f32 values to f64
14716 if (ResVT.getScalarSizeInBits() == 32) {
14717 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14718 DCI.AddToWorklist(Val.getNode());
14719 }
14720
14721 // Set signed or unsigned conversion opcode.
14722 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14724 PPCISD::FP_TO_UINT_IN_VSR;
14725
14726 Val = DAG.getNode(ConvOpcode,
14727 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14728 DCI.AddToWorklist(Val.getNode());
14729
14730 // Set number of bytes being converted.
14731 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14732 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14733 DAG.getIntPtrConstant(ByteSize, dl, false),
14734 DAG.getValueType(Op1VT) };
14735
14737 DAG.getVTList(MVT::Other), Ops,
14738 cast<StoreSDNode>(N)->getMemoryVT(),
14739 cast<StoreSDNode>(N)->getMemOperand());
14740
14741 DCI.AddToWorklist(Val.getNode());
14742 return Val;
14743}
14744
14745static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14746 // Check that the source of the element keeps flipping
14747 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14748 bool PrevElemFromFirstVec = Mask[0] < NumElts;
14749 for (int i = 1, e = Mask.size(); i < e; i++) {
14750 if (PrevElemFromFirstVec && Mask[i] < NumElts)
14751 return false;
14752 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14753 return false;
14754 PrevElemFromFirstVec = !PrevElemFromFirstVec;
14755 }
14756 return true;
14757}
14758
14759static bool isSplatBV(SDValue Op) {
14760 if (Op.getOpcode() != ISD::BUILD_VECTOR)
14761 return false;
14762 SDValue FirstOp;
14763
14764 // Find first non-undef input.
14765 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14766 FirstOp = Op.getOperand(i);
14767 if (!FirstOp.isUndef())
14768 break;
14769 }
14770
14771 // All inputs are undef or the same as the first non-undef input.
14772 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14773 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14774 return false;
14775 return true;
14776}
14777
14779 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14780 return Op;
14781 if (Op.getOpcode() != ISD::BITCAST)
14782 return SDValue();
14783 Op = Op.getOperand(0);
14784 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14785 return Op;
14786 return SDValue();
14787}
14788
14789// Fix up the shuffle mask to account for the fact that the result of
14790// scalar_to_vector is not in lane zero. This just takes all values in
14791// the ranges specified by the min/max indices and adds the number of
14792// elements required to ensure each element comes from the respective
14793// position in the valid lane.
14794// On little endian, that's just the corresponding element in the other
14795// half of the vector. On big endian, it is in the same half but right
14796// justified rather than left justified in that half.
14798 int LHSMaxIdx, int RHSMinIdx,
14799 int RHSMaxIdx, int HalfVec,
14800 unsigned ValidLaneWidth,
14801 const PPCSubtarget &Subtarget) {
14802 for (int i = 0, e = ShuffV.size(); i < e; i++) {
14803 int Idx = ShuffV[i];
14804 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14805 ShuffV[i] +=
14806 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
14807 }
14808}
14809
14810// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14811// the original is:
14812// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14813// In such a case, just change the shuffle mask to extract the element
14814// from the permuted index.
14816 const PPCSubtarget &Subtarget) {
14817 SDLoc dl(OrigSToV);
14818 EVT VT = OrigSToV.getValueType();
14819 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14820 "Expecting a SCALAR_TO_VECTOR here");
14821 SDValue Input = OrigSToV.getOperand(0);
14822
14823 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14824 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14825 SDValue OrigVector = Input.getOperand(0);
14826
14827 // Can't handle non-const element indices or different vector types
14828 // for the input to the extract and the output of the scalar_to_vector.
14829 if (Idx && VT == OrigVector.getValueType()) {
14830 unsigned NumElts = VT.getVectorNumElements();
14831 assert(
14832 NumElts > 1 &&
14833 "Cannot produce a permuted scalar_to_vector for one element vector");
14834 SmallVector<int, 16> NewMask(NumElts, -1);
14835 unsigned ResultInElt = NumElts / 2;
14836 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
14837 NewMask[ResultInElt] = Idx->getZExtValue();
14838 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14839 }
14840 }
14841 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14842 OrigSToV.getOperand(0));
14843}
14844
14845// On little endian subtargets, combine shuffles such as:
14846// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14847// into:
14848// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14849// because the latter can be matched to a single instruction merge.
14850// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14851// to put the value into element zero. Adjust the shuffle mask so that the
14852// vector can remain in permuted form (to prevent a swap prior to a shuffle).
14853// On big endian targets, this is still useful for SCALAR_TO_VECTOR
14854// nodes with elements smaller than doubleword because all the ways
14855// of getting scalar data into a vector register put the value in the
14856// rightmost element of the left half of the vector.
14857SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14858 SelectionDAG &DAG) const {
14859 SDValue LHS = SVN->getOperand(0);
14860 SDValue RHS = SVN->getOperand(1);
14861 auto Mask = SVN->getMask();
14862 int NumElts = LHS.getValueType().getVectorNumElements();
14863 SDValue Res(SVN, 0);
14864 SDLoc dl(SVN);
14865 bool IsLittleEndian = Subtarget.isLittleEndian();
14866
14867 // On big endian targets this is only useful for subtargets with direct moves.
14868 // On little endian targets it would be useful for all subtargets with VSX.
14869 // However adding special handling for LE subtargets without direct moves
14870 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
14871 // which includes direct moves.
14872 if (!Subtarget.hasDirectMove())
14873 return Res;
14874
14875 // If this is not a shuffle of a shuffle and the first element comes from
14876 // the second vector, canonicalize to the commuted form. This will make it
14877 // more likely to match one of the single instruction patterns.
14878 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14879 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14880 std::swap(LHS, RHS);
14881 Res = DAG.getCommutedVectorShuffle(*SVN);
14882 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14883 }
14884
14885 // Adjust the shuffle mask if either input vector comes from a
14886 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14887 // form (to prevent the need for a swap).
14888 SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14889 SDValue SToVLHS = isScalarToVec(LHS);
14890 SDValue SToVRHS = isScalarToVec(RHS);
14891 if (SToVLHS || SToVRHS) {
14892 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
14893 // same type and have differing element sizes, then do not perform
14894 // the following transformation. The current transformation for
14895 // SCALAR_TO_VECTOR assumes that both input vectors have the same
14896 // element size. This will be updated in the future to account for
14897 // differing sizes of the LHS and RHS.
14898 if (SToVLHS && SToVRHS &&
14899 (SToVLHS.getValueType().getScalarSizeInBits() !=
14900 SToVRHS.getValueType().getScalarSizeInBits()))
14901 return Res;
14902
14903 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14904 : SToVRHS.getValueType().getVectorNumElements();
14905 int NumEltsOut = ShuffV.size();
14906 // The width of the "valid lane" (i.e. the lane that contains the value that
14907 // is vectorized) needs to be expressed in terms of the number of elements
14908 // of the shuffle. It is thereby the ratio of the values before and after
14909 // any bitcast.
14910 unsigned ValidLaneWidth =
14911 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
14912 LHS.getValueType().getScalarSizeInBits()
14913 : SToVRHS.getValueType().getScalarSizeInBits() /
14914 RHS.getValueType().getScalarSizeInBits();
14915
14916 // Initially assume that neither input is permuted. These will be adjusted
14917 // accordingly if either input is.
14918 int LHSMaxIdx = -1;
14919 int RHSMinIdx = -1;
14920 int RHSMaxIdx = -1;
14921 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14922
14923 // Get the permuted scalar to vector nodes for the source(s) that come from
14924 // ISD::SCALAR_TO_VECTOR.
14925 // On big endian systems, this only makes sense for element sizes smaller
14926 // than 64 bits since for 64-bit elements, all instructions already put
14927 // the value into element zero. Since scalar size of LHS and RHS may differ
14928 // after isScalarToVec, this should be checked using their own sizes.
14929 if (SToVLHS) {
14930 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
14931 return Res;
14932 // Set up the values for the shuffle vector fixup.
14933 LHSMaxIdx = NumEltsOut / NumEltsIn;
14934 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
14935 if (SToVLHS.getValueType() != LHS.getValueType())
14936 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14937 LHS = SToVLHS;
14938 }
14939 if (SToVRHS) {
14940 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
14941 return Res;
14942 RHSMinIdx = NumEltsOut;
14943 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14944 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
14945 if (SToVRHS.getValueType() != RHS.getValueType())
14946 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14947 RHS = SToVRHS;
14948 }
14949
14950 // Fix up the shuffle mask to reflect where the desired element actually is.
14951 // The minimum and maximum indices that correspond to element zero for both
14952 // the LHS and RHS are computed and will control which shuffle mask entries
14953 // are to be changed. For example, if the RHS is permuted, any shuffle mask
14954 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
14955 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14956 HalfVec, ValidLaneWidth, Subtarget);
14957 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14958
14959 // We may have simplified away the shuffle. We won't be able to do anything
14960 // further with it here.
14961 if (!isa<ShuffleVectorSDNode>(Res))
14962 return Res;
14963 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14964 }
14965
14966 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
14967 // The common case after we commuted the shuffle is that the RHS is a splat
14968 // and we have elements coming in from the splat at indices that are not
14969 // conducive to using a merge.
14970 // Example:
14971 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14972 if (!isSplatBV(TheSplat))
14973 return Res;
14974
14975 // We are looking for a mask such that all even elements are from
14976 // one vector and all odd elements from the other.
14977 if (!isAlternatingShuffMask(Mask, NumElts))
14978 return Res;
14979
14980 // Adjust the mask so we are pulling in the same index from the splat
14981 // as the index from the interesting vector in consecutive elements.
14982 if (IsLittleEndian) {
14983 // Example (even elements from first vector):
14984 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14985 if (Mask[0] < NumElts)
14986 for (int i = 1, e = Mask.size(); i < e; i += 2) {
14987 if (ShuffV[i] < 0)
14988 continue;
14989 ShuffV[i] = (ShuffV[i - 1] + NumElts);
14990 }
14991 // Example (odd elements from first vector):
14992 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14993 else
14994 for (int i = 0, e = Mask.size(); i < e; i += 2) {
14995 if (ShuffV[i] < 0)
14996 continue;
14997 ShuffV[i] = (ShuffV[i + 1] + NumElts);
14998 }
14999 } else {
15000 // Example (even elements from first vector):
15001 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15002 if (Mask[0] < NumElts)
15003 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15004 if (ShuffV[i] < 0)
15005 continue;
15006 ShuffV[i] = ShuffV[i + 1] - NumElts;
15007 }
15008 // Example (odd elements from first vector):
15009 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15010 else
15011 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15012 if (ShuffV[i] < 0)
15013 continue;
15014 ShuffV[i] = ShuffV[i - 1] - NumElts;
15015 }
15016 }
15017
15018 // If the RHS has undefs, we need to remove them since we may have created
15019 // a shuffle that adds those instead of the splat value.
15020 SDValue SplatVal =
15021 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15022 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15023
15024 if (IsLittleEndian)
15025 RHS = TheSplat;
15026 else
15027 LHS = TheSplat;
15028 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15029}
15030
15031SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15032 LSBaseSDNode *LSBase,
15033 DAGCombinerInfo &DCI) const {
15034 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15035 "Not a reverse memop pattern!");
15036
15037 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15038 auto Mask = SVN->getMask();
15039 int i = 0;
15040 auto I = Mask.rbegin();
15041 auto E = Mask.rend();
15042
15043 for (; I != E; ++I) {
15044 if (*I != i)
15045 return false;
15046 i++;
15047 }
15048 return true;
15049 };
15050
15051 SelectionDAG &DAG = DCI.DAG;
15052 EVT VT = SVN->getValueType(0);
15053
15054 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15055 return SDValue();
15056
15057 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15058 // See comment in PPCVSXSwapRemoval.cpp.
15059 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15060 if (!Subtarget.hasP9Vector())
15061 return SDValue();
15062
15063 if(!IsElementReverse(SVN))
15064 return SDValue();
15065
15066 if (LSBase->getOpcode() == ISD::LOAD) {
15067 // If the load return value 0 has more than one user except the
15068 // shufflevector instruction, it is not profitable to replace the
15069 // shufflevector with a reverse load.
15070 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15071 UI != UE; ++UI)
15072 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15073 return SDValue();
15074
15075 SDLoc dl(LSBase);
15076 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15077 return DAG.getMemIntrinsicNode(
15078 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15079 LSBase->getMemoryVT(), LSBase->getMemOperand());
15080 }
15081
15082 if (LSBase->getOpcode() == ISD::STORE) {
15083 // If there are other uses of the shuffle, the swap cannot be avoided.
15084 // Forcing the use of an X-Form (since swapped stores only have
15085 // X-Forms) without removing the swap is unprofitable.
15086 if (!SVN->hasOneUse())
15087 return SDValue();
15088
15089 SDLoc dl(LSBase);
15090 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15091 LSBase->getBasePtr()};
15092 return DAG.getMemIntrinsicNode(
15093 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15094 LSBase->getMemoryVT(), LSBase->getMemOperand());
15095 }
15096
15097 llvm_unreachable("Expected a load or store node here");
15098}
15099
15101 DAGCombinerInfo &DCI) const {
15102 SelectionDAG &DAG = DCI.DAG;
15103 SDLoc dl(N);
15104 switch (N->getOpcode()) {
15105 default: break;
15106 case ISD::ADD:
15107 return combineADD(N, DCI);
15108 case ISD::SHL:
15109 return combineSHL(N, DCI);
15110 case ISD::SRA:
15111 return combineSRA(N, DCI);
15112 case ISD::SRL:
15113 return combineSRL(N, DCI);
15114 case ISD::MUL:
15115 return combineMUL(N, DCI);
15116 case ISD::FMA:
15117 case PPCISD::FNMSUB:
15118 return combineFMALike(N, DCI);
15119 case PPCISD::SHL:
15120 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15121 return N->getOperand(0);
15122 break;
15123 case PPCISD::SRL:
15124 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15125 return N->getOperand(0);
15126 break;
15127 case PPCISD::SRA:
15128 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15129 if (C->isZero() || // 0 >>s V -> 0.
15130 C->isAllOnes()) // -1 >>s V -> -1.
15131 return N->getOperand(0);
15132 }
15133 break;
15134 case ISD::SIGN_EXTEND:
15135 case ISD::ZERO_EXTEND:
15136 case ISD::ANY_EXTEND:
15137 return DAGCombineExtBoolTrunc(N, DCI);
15138 case ISD::TRUNCATE:
15139 return combineTRUNCATE(N, DCI);
15140 case ISD::SETCC:
15141 if (SDValue CSCC = combineSetCC(N, DCI))
15142 return CSCC;
15144 case ISD::SELECT_CC:
15145 return DAGCombineTruncBoolExt(N, DCI);
15146 case ISD::SINT_TO_FP:
15147 case ISD::UINT_TO_FP:
15148 return combineFPToIntToFP(N, DCI);
15150 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15151 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15152 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15153 }
15154 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15155 case ISD::STORE: {
15156
15157 EVT Op1VT = N->getOperand(1).getValueType();
15158 unsigned Opcode = N->getOperand(1).getOpcode();
15159
15160 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
15161 SDValue Val= combineStoreFPToInt(N, DCI);
15162 if (Val)
15163 return Val;
15164 }
15165
15166 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15167 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15168 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15169 if (Val)
15170 return Val;
15171 }
15172
15173 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15174 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15175 N->getOperand(1).getNode()->hasOneUse() &&
15176 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15177 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15178
15179 // STBRX can only handle simple types and it makes no sense to store less
15180 // two bytes in byte-reversed order.
15181 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15182 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15183 break;
15184
15185 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15186 // Do an any-extend to 32-bits if this is a half-word input.
15187 if (BSwapOp.getValueType() == MVT::i16)
15188 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15189
15190 // If the type of BSWAP operand is wider than stored memory width
15191 // it need to be shifted to the right side before STBRX.
15192 if (Op1VT.bitsGT(mVT)) {
15193 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15194 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15195 DAG.getConstant(Shift, dl, MVT::i32));
15196 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15197 if (Op1VT == MVT::i64)
15198 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15199 }
15200
15201 SDValue Ops[] = {
15202 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15203 };
15204 return
15206 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15207 cast<StoreSDNode>(N)->getMemOperand());
15208 }
15209
15210 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15211 // So it can increase the chance of CSE constant construction.
15212 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15213 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15214 // Need to sign-extended to 64-bits to handle negative values.
15215 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15216 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15217 MemVT.getSizeInBits());
15218 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15219
15220 // DAG.getTruncStore() can't be used here because it doesn't accept
15221 // the general (base + offset) addressing mode.
15222 // So we use UpdateNodeOperands and setTruncatingStore instead.
15223 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15224 N->getOperand(3));
15225 cast<StoreSDNode>(N)->setTruncatingStore(true);
15226 return SDValue(N, 0);
15227 }
15228
15229 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15230 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15231 if (Op1VT.isSimple()) {
15232 MVT StoreVT = Op1VT.getSimpleVT();
15233 if (Subtarget.needsSwapsForVSXMemOps() &&
15234 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15235 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15236 return expandVSXStoreForLE(N, DCI);
15237 }
15238 break;
15239 }
15240 case ISD::LOAD: {
15242 EVT VT = LD->getValueType(0);
15243
15244 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15245 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15246 if (VT.isSimple()) {
15247 MVT LoadVT = VT.getSimpleVT();
15248 if (Subtarget.needsSwapsForVSXMemOps() &&
15249 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15250 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15251 return expandVSXLoadForLE(N, DCI);
15252 }
15253
15254 // We sometimes end up with a 64-bit integer load, from which we extract
15255 // two single-precision floating-point numbers. This happens with
15256 // std::complex<float>, and other similar structures, because of the way we
15257 // canonicalize structure copies. However, if we lack direct moves,
15258 // then the final bitcasts from the extracted integer values to the
15259 // floating-point numbers turn into store/load pairs. Even with direct moves,
15260 // just loading the two floating-point numbers is likely better.
15261 auto ReplaceTwoFloatLoad = [&]() {
15262 if (VT != MVT::i64)
15263 return false;
15264
15265 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15266 LD->isVolatile())
15267 return false;
15268
15269 // We're looking for a sequence like this:
15270 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15271 // t16: i64 = srl t13, Constant:i32<32>
15272 // t17: i32 = truncate t16
15273 // t18: f32 = bitcast t17
15274 // t19: i32 = truncate t13
15275 // t20: f32 = bitcast t19
15276
15277 if (!LD->hasNUsesOfValue(2, 0))
15278 return false;
15279
15280 auto UI = LD->use_begin();
15281 while (UI.getUse().getResNo() != 0) ++UI;
15282 SDNode *Trunc = *UI++;
15283 while (UI.getUse().getResNo() != 0) ++UI;
15284 SDNode *RightShift = *UI;
15285 if (Trunc->getOpcode() != ISD::TRUNCATE)
15286 std::swap(Trunc, RightShift);
15287
15288 if (Trunc->getOpcode() != ISD::TRUNCATE ||
15289 Trunc->getValueType(0) != MVT::i32 ||
15290 !Trunc->hasOneUse())
15291 return false;
15292 if (RightShift->getOpcode() != ISD::SRL ||
15293 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15294 RightShift->getConstantOperandVal(1) != 32 ||
15295 !RightShift->hasOneUse())
15296 return false;
15297
15298 SDNode *Trunc2 = *RightShift->use_begin();
15299 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15300 Trunc2->getValueType(0) != MVT::i32 ||
15301 !Trunc2->hasOneUse())
15302 return false;
15303
15304 SDNode *Bitcast = *Trunc->use_begin();
15305 SDNode *Bitcast2 = *Trunc2->use_begin();
15306
15307 if (Bitcast->getOpcode() != ISD::BITCAST ||
15308 Bitcast->getValueType(0) != MVT::f32)
15309 return false;
15310 if (Bitcast2->getOpcode() != ISD::BITCAST ||
15311 Bitcast2->getValueType(0) != MVT::f32)
15312 return false;
15313
15314 if (Subtarget.isLittleEndian())
15315 std::swap(Bitcast, Bitcast2);
15316
15317 // Bitcast has the second float (in memory-layout order) and Bitcast2
15318 // has the first one.
15319
15320 SDValue BasePtr = LD->getBasePtr();
15321 if (LD->isIndexed()) {
15322 assert(LD->getAddressingMode() == ISD::PRE_INC &&
15323 "Non-pre-inc AM on PPC?");
15324 BasePtr =
15325 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15326 LD->getOffset());
15327 }
15328
15329 auto MMOFlags =
15330 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15331 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15332 LD->getPointerInfo(), LD->getAlign(),
15333 MMOFlags, LD->getAAInfo());
15334 SDValue AddPtr =
15335 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15336 BasePtr, DAG.getIntPtrConstant(4, dl));
15337 SDValue FloatLoad2 = DAG.getLoad(
15338 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15339 LD->getPointerInfo().getWithOffset(4),
15340 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
15341
15342 if (LD->isIndexed()) {
15343 // Note that DAGCombine should re-form any pre-increment load(s) from
15344 // what is produced here if that makes sense.
15345 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15346 }
15347
15348 DCI.CombineTo(Bitcast2, FloatLoad);
15349 DCI.CombineTo(Bitcast, FloatLoad2);
15350
15351 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15352 SDValue(FloatLoad2.getNode(), 1));
15353 return true;
15354 };
15355
15356 if (ReplaceTwoFloatLoad())
15357 return SDValue(N, 0);
15358
15359 EVT MemVT = LD->getMemoryVT();
15360 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15361 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15362 if (LD->isUnindexed() && VT.isVector() &&
15363 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15364 // P8 and later hardware should just use LOAD.
15365 !Subtarget.hasP8Vector() &&
15366 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15367 VT == MVT::v4f32))) &&
15368 LD->getAlign() < ABIAlignment) {
15369 // This is a type-legal unaligned Altivec load.
15370 SDValue Chain = LD->getChain();
15371 SDValue Ptr = LD->getBasePtr();
15372 bool isLittleEndian = Subtarget.isLittleEndian();
15373
15374 // This implements the loading of unaligned vectors as described in
15375 // the venerable Apple Velocity Engine overview. Specifically:
15376 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15377 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15378 //
15379 // The general idea is to expand a sequence of one or more unaligned
15380 // loads into an alignment-based permutation-control instruction (lvsl
15381 // or lvsr), a series of regular vector loads (which always truncate
15382 // their input address to an aligned address), and a series of
15383 // permutations. The results of these permutations are the requested
15384 // loaded values. The trick is that the last "extra" load is not taken
15385 // from the address you might suspect (sizeof(vector) bytes after the
15386 // last requested load), but rather sizeof(vector) - 1 bytes after the
15387 // last requested vector. The point of this is to avoid a page fault if
15388 // the base address happened to be aligned. This works because if the
15389 // base address is aligned, then adding less than a full vector length
15390 // will cause the last vector in the sequence to be (re)loaded.
15391 // Otherwise, the next vector will be fetched as you might suspect was
15392 // necessary.
15393
15394 // We might be able to reuse the permutation generation from
15395 // a different base address offset from this one by an aligned amount.
15396 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15397 // optimization later.
15398 Intrinsic::ID Intr, IntrLD, IntrPerm;
15399 MVT PermCntlTy, PermTy, LDTy;
15400 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15401 : Intrinsic::ppc_altivec_lvsl;
15402 IntrLD = Intrinsic::ppc_altivec_lvx;
15403 IntrPerm = Intrinsic::ppc_altivec_vperm;
15404 PermCntlTy = MVT::v16i8;
15405 PermTy = MVT::v4i32;
15406 LDTy = MVT::v4i32;
15407
15408 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15409
15410 // Create the new MMO for the new base load. It is like the original MMO,
15411 // but represents an area in memory almost twice the vector size centered
15412 // on the original address. If the address is unaligned, we might start
15413 // reading up to (sizeof(vector)-1) bytes below the address of the
15414 // original unaligned load.
15416 MachineMemOperand *BaseMMO =
15417 MF.getMachineMemOperand(LD->getMemOperand(),
15418 -(long)MemVT.getStoreSize()+1,
15419 2*MemVT.getStoreSize()-1);
15420
15421 // Create the new base load.
15422 SDValue LDXIntID =
15423 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15424 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15425 SDValue BaseLoad =
15427 DAG.getVTList(PermTy, MVT::Other),
15428 BaseLoadOps, LDTy, BaseMMO);
15429
15430 // Note that the value of IncOffset (which is provided to the next
15431 // load's pointer info offset value, and thus used to calculate the
15432 // alignment), and the value of IncValue (which is actually used to
15433 // increment the pointer value) are different! This is because we
15434 // require the next load to appear to be aligned, even though it
15435 // is actually offset from the base pointer by a lesser amount.
15436 int IncOffset = VT.getSizeInBits() / 8;
15437 int IncValue = IncOffset;
15438
15439 // Walk (both up and down) the chain looking for another load at the real
15440 // (aligned) offset (the alignment of the other load does not matter in
15441 // this case). If found, then do not use the offset reduction trick, as
15442 // that will prevent the loads from being later combined (as they would
15443 // otherwise be duplicates).
15444 if (!findConsecutiveLoad(LD, DAG))
15445 --IncValue;
15446
15447 SDValue Increment =
15448 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15449 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15450
15451 MachineMemOperand *ExtraMMO =
15452 MF.getMachineMemOperand(LD->getMemOperand(),
15453 1, 2*MemVT.getStoreSize()-1);
15454 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15455 SDValue ExtraLoad =
15457 DAG.getVTList(PermTy, MVT::Other),
15458 ExtraLoadOps, LDTy, ExtraMMO);
15459
15461 BaseLoad.getValue(1), ExtraLoad.getValue(1));
15462
15463 // Because vperm has a big-endian bias, we must reverse the order
15464 // of the input vectors and complement the permute control vector
15465 // when generating little endian code. We have already handled the
15466 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15467 // and ExtraLoad here.
15468 SDValue Perm;
15469 if (isLittleEndian)
15470 Perm = BuildIntrinsicOp(IntrPerm,
15471 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15472 else
15473 Perm = BuildIntrinsicOp(IntrPerm,
15474 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15475
15476 if (VT != PermTy)
15477 Perm = Subtarget.hasAltivec()
15478 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15479 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15480 DAG.getTargetConstant(1, dl, MVT::i64));
15481 // second argument is 1 because this rounding
15482 // is always exact.
15483
15484 // The output of the permutation is our loaded result, the TokenFactor is
15485 // our new chain.
15486 DCI.CombineTo(N, Perm, TF);
15487 return SDValue(N, 0);
15488 }
15489 }
15490 break;
15492 bool isLittleEndian = Subtarget.isLittleEndian();
15493 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15494 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15495 : Intrinsic::ppc_altivec_lvsl);
15496 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15497 SDValue Add = N->getOperand(1);
15498
15499 int Bits = 4 /* 16 byte alignment */;
15500
15501 if (DAG.MaskedValueIsZero(Add->getOperand(1),
15502 APInt::getAllOnes(Bits /* alignment */)
15503 .zext(Add.getScalarValueSizeInBits()))) {
15504 SDNode *BasePtr = Add->getOperand(0).getNode();
15505 for (SDNode *U : BasePtr->uses()) {
15506 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15507 cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
15508 // We've found another LVSL/LVSR, and this address is an aligned
15509 // multiple of that one. The results will be the same, so use the
15510 // one we've just found instead.
15511
15512 return SDValue(U, 0);
15513 }
15514 }
15515 }
15516
15517 if (isa<ConstantSDNode>(Add->getOperand(1))) {
15518 SDNode *BasePtr = Add->getOperand(0).getNode();
15519 for (SDNode *U : BasePtr->uses()) {
15520 if (U->getOpcode() == ISD::ADD &&
15521 isa<ConstantSDNode>(U->getOperand(1)) &&
15522 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15523 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
15524 (1ULL << Bits) ==
15525 0) {
15526 SDNode *OtherAdd = U;
15527 for (SDNode *V : OtherAdd->uses()) {
15528 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15529 cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
15530 IID) {
15531 return SDValue(V, 0);
15532 }
15533 }
15534 }
15535 }
15536 }
15537 }
15538
15539 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15540 // Expose the vabsduw/h/b opportunity for down stream
15541 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15542 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15543 IID == Intrinsic::ppc_altivec_vmaxsh ||
15544 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15545 SDValue V1 = N->getOperand(1);
15546 SDValue V2 = N->getOperand(2);
15547 if ((V1.getSimpleValueType() == MVT::v4i32 ||
15549 V1.getSimpleValueType() == MVT::v16i8) &&
15550 V1.getSimpleValueType() == V2.getSimpleValueType()) {
15551 // (0-a, a)
15552 if (V1.getOpcode() == ISD::SUB &&
15554 V1.getOperand(1) == V2) {
15555 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15556 }
15557 // (a, 0-a)
15558 if (V2.getOpcode() == ISD::SUB &&
15559 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15560 V2.getOperand(1) == V1) {
15561 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15562 }
15563 // (x-y, y-x)
15564 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15565 V1.getOperand(0) == V2.getOperand(1) &&
15566 V1.getOperand(1) == V2.getOperand(0)) {
15567 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15568 }
15569 }
15570 }
15571 }
15572
15573 break;
15575 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15576 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15577 if (Subtarget.needsSwapsForVSXMemOps()) {
15578 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15579 default:
15580 break;
15581 case Intrinsic::ppc_vsx_lxvw4x:
15582 case Intrinsic::ppc_vsx_lxvd2x:
15583 return expandVSXLoadForLE(N, DCI);
15584 }
15585 }
15586 break;
15588 // For little endian, VSX stores require generating xxswapd/stxvd2x.
15589 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15590 if (Subtarget.needsSwapsForVSXMemOps()) {
15591 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15592 default:
15593 break;
15594 case Intrinsic::ppc_vsx_stxvw4x:
15595 case Intrinsic::ppc_vsx_stxvd2x:
15596 return expandVSXStoreForLE(N, DCI);
15597 }
15598 }
15599 break;
15600 case ISD::BSWAP: {
15601 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15602 // For subtargets without LDBRX, we can still do better than the default
15603 // expansion even for 64-bit BSWAP (LOAD).
15604 bool Is64BitBswapOn64BitTgt =
15605 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
15606 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
15607 N->getOperand(0).hasOneUse();
15608 if (IsSingleUseNormalLd &&
15609 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15610 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
15611 SDValue Load = N->getOperand(0);
15612 LoadSDNode *LD = cast<LoadSDNode>(Load);
15613 // Create the byte-swapping load.
15614 SDValue Ops[] = {
15615 LD->getChain(), // Chain
15616 LD->getBasePtr(), // Ptr
15617 DAG.getValueType(N->getValueType(0)) // VT
15618 };
15619 SDValue BSLoad =
15621 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15623 Ops, LD->getMemoryVT(), LD->getMemOperand());
15624
15625 // If this is an i16 load, insert the truncate.
15626 SDValue ResVal = BSLoad;
15627 if (N->getValueType(0) == MVT::i16)
15628 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15629
15630 // First, combine the bswap away. This makes the value produced by the
15631 // load dead.
15632 DCI.CombineTo(N, ResVal);
15633
15634 // Next, combine the load away, we give it a bogus result value but a real
15635 // chain result. The result value is dead because the bswap is dead.
15636 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15637
15638 // Return N so it doesn't get rechecked!
15639 return SDValue(N, 0);
15640 }
15641 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
15642 // before legalization so that the BUILD_PAIR is handled correctly.
15643 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
15644 !IsSingleUseNormalLd)
15645 return SDValue();
15646 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
15647
15648 // Can't split volatile or atomic loads.
15649 if (!LD->isSimple())
15650 return SDValue();
15651 SDValue BasePtr = LD->getBasePtr();
15652 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
15653 LD->getPointerInfo(), LD->getAlign());
15654 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
15655 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15656 DAG.getIntPtrConstant(4, dl));
15658 LD->getMemOperand(), 4, 4);
15659 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
15660 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
15661 SDValue Res;
15662 if (Subtarget.isLittleEndian())
15663 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
15664 else
15665 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
15666 SDValue TF =
15668 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
15669 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
15670 return Res;
15671 }
15672 case PPCISD::VCMP:
15673 // If a VCMP_rec node already exists with exactly the same operands as this
15674 // node, use its result instead of this node (VCMP_rec computes both a CR6
15675 // and a normal output).
15676 //
15677 if (!N->getOperand(0).hasOneUse() &&
15678 !N->getOperand(1).hasOneUse() &&
15679 !N->getOperand(2).hasOneUse()) {
15680
15681 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
15682 SDNode *VCMPrecNode = nullptr;
15683
15684 SDNode *LHSN = N->getOperand(0).getNode();
15685 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15686 UI != E; ++UI)
15687 if (UI->getOpcode() == PPCISD::VCMP_rec &&
15688 UI->getOperand(1) == N->getOperand(1) &&
15689 UI->getOperand(2) == N->getOperand(2) &&
15690 UI->getOperand(0) == N->getOperand(0)) {
15691 VCMPrecNode = *UI;
15692 break;
15693 }
15694
15695 // If there is no VCMP_rec node, or if the flag value has a single use,
15696 // don't transform this.
15697 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
15698 break;
15699
15700 // Look at the (necessarily single) use of the flag value. If it has a
15701 // chain, this transformation is more complex. Note that multiple things
15702 // could use the value result, which we should ignore.
15703 SDNode *FlagUser = nullptr;
15704 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
15705 FlagUser == nullptr; ++UI) {
15706 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
15707 SDNode *User = *UI;
15708 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15709 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
15710 FlagUser = User;
15711 break;
15712 }
15713 }
15714 }
15715
15716 // If the user is a MFOCRF instruction, we know this is safe.
15717 // Otherwise we give up for right now.
15718 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15719 return SDValue(VCMPrecNode, 0);
15720 }
15721 break;
15722 case ISD::BRCOND: {
15723 SDValue Cond = N->getOperand(1);
15724 SDValue Target = N->getOperand(2);
15725
15726 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15727 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15728 Intrinsic::loop_decrement) {
15729
15730 // We now need to make the intrinsic dead (it cannot be instruction
15731 // selected).
15732 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15733 assert(Cond.getNode()->hasOneUse() &&
15734 "Counter decrement has more than one use");
15735
15736 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15737 N->getOperand(0), Target);
15738 }
15739 }
15740 break;
15741 case ISD::BR_CC: {
15742 // If this is a branch on an altivec predicate comparison, lower this so
15743 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15744 // lowering is done pre-legalize, because the legalizer lowers the predicate
15745 // compare down to code that is difficult to reassemble.
15746 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15747 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15748
15749 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15750 // value. If so, pass-through the AND to get to the intrinsic.
15751 if (LHS.getOpcode() == ISD::AND &&
15752 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15753 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15754 Intrinsic::loop_decrement &&
15755 isa<ConstantSDNode>(LHS.getOperand(1)) &&
15756 !isNullConstant(LHS.getOperand(1)))
15757 LHS = LHS.getOperand(0);
15758
15759 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15760 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15761 Intrinsic::loop_decrement &&
15762 isa<ConstantSDNode>(RHS)) {
15763 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
15764 "Counter decrement comparison is not EQ or NE");
15765
15766 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15767 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15768 (CC == ISD::SETNE && !Val);
15769
15770 // We now need to make the intrinsic dead (it cannot be instruction
15771 // selected).
15772 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15773 assert(LHS.getNode()->hasOneUse() &&
15774 "Counter decrement has more than one use");
15775
15776 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15777 N->getOperand(0), N->getOperand(4));
15778 }
15779
15780 int CompareOpc;
15781 bool isDot;
15782
15783 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15784 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15785 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15786 assert(isDot && "Can't compare against a vector result!");
15787
15788 // If this is a comparison against something other than 0/1, then we know
15789 // that the condition is never/always true.
15790 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15791 if (Val != 0 && Val != 1) {
15792 if (CC == ISD::SETEQ) // Cond never true, remove branch.
15793 return N->getOperand(0);
15794 // Always !=, turn it into an unconditional branch.
15795 return DAG.getNode(ISD::BR, dl, MVT::Other,
15796 N->getOperand(0), N->getOperand(4));
15797 }
15798
15799 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15800
15801 // Create the PPCISD altivec 'dot' comparison node.
15802 SDValue Ops[] = {
15803 LHS.getOperand(2), // LHS of compare
15804 LHS.getOperand(3), // RHS of compare
15805 DAG.getConstant(CompareOpc, dl, MVT::i32)
15806 };
15807 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15808 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
15809
15810 // Unpack the result based on how the target uses it.
15811 PPC::Predicate CompOpc;
15812 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15813 default: // Can't happen, don't crash on invalid number though.
15814 case 0: // Branch on the value of the EQ bit of CR6.
15815 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15816 break;
15817 case 1: // Branch on the inverted value of the EQ bit of CR6.
15818 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15819 break;
15820 case 2: // Branch on the value of the LT bit of CR6.
15821 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15822 break;
15823 case 3: // Branch on the inverted value of the LT bit of CR6.
15824 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15825 break;
15826 }
15827
15828 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15829 DAG.getConstant(CompOpc, dl, MVT::i32),
15830 DAG.getRegister(PPC::CR6, MVT::i32),
15831 N->getOperand(4), CompNode.getValue(1));
15832 }
15833 break;
15834 }
15835 case ISD::BUILD_VECTOR:
15836 return DAGCombineBuildVector(N, DCI);
15837 case ISD::ABS:
15838 return combineABS(N, DCI);
15839 case ISD::VSELECT:
15840 return combineVSelect(N, DCI);
15841 }
15842
15843 return SDValue();
15844}
15845
15846SDValue
15848 SelectionDAG &DAG,
15849 SmallVectorImpl<SDNode *> &Created) const {
15850 // fold (sdiv X, pow2)
15851 EVT VT = N->getValueType(0);
15852 if (VT == MVT::i64 && !Subtarget.isPPC64())
15853 return SDValue();
15854 if ((VT != MVT::i32 && VT != MVT::i64) ||
15855 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
15856 return SDValue();
15857
15858 SDLoc DL(N);
15859 SDValue N0 = N->getOperand(0);
15860
15861 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
15862 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15863 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15864
15865 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15866 Created.push_back(Op.getNode());
15867
15868 if (IsNegPow2) {
15869 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15870 Created.push_back(Op.getNode());
15871 }
15872
15873 return Op;
15874}
15875
15876//===----------------------------------------------------------------------===//
15877// Inline Assembly Support
15878//===----------------------------------------------------------------------===//
15879
15881 KnownBits &Known,
15882 const APInt &DemandedElts,
15883 const SelectionDAG &DAG,
15884 unsigned Depth) const {
15885 Known.resetAll();
15886 switch (Op.getOpcode()) {
15887 default: break;
15888 case PPCISD::LBRX: {
15889 // lhbrx is known to have the top bits cleared out.
15890 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15891 Known.Zero = 0xFFFF0000;
15892 break;
15893 }
15895 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15896 default: break;
15897 case Intrinsic::ppc_altivec_vcmpbfp_p:
15898 case Intrinsic::ppc_altivec_vcmpeqfp_p:
15899 case Intrinsic::ppc_altivec_vcmpequb_p:
15900 case Intrinsic::ppc_altivec_vcmpequh_p:
15901 case Intrinsic::ppc_altivec_vcmpequw_p:
15902 case Intrinsic::ppc_altivec_vcmpequd_p:
15903 case Intrinsic::ppc_altivec_vcmpequq_p:
15904 case Intrinsic::ppc_altivec_vcmpgefp_p:
15905 case Intrinsic::ppc_altivec_vcmpgtfp_p:
15906 case Intrinsic::ppc_altivec_vcmpgtsb_p:
15907 case Intrinsic::ppc_altivec_vcmpgtsh_p:
15908 case Intrinsic::ppc_altivec_vcmpgtsw_p:
15909 case Intrinsic::ppc_altivec_vcmpgtsd_p:
15910 case Intrinsic::ppc_altivec_vcmpgtsq_p:
15911 case Intrinsic::ppc_altivec_vcmpgtub_p:
15912 case Intrinsic::ppc_altivec_vcmpgtuh_p:
15913 case Intrinsic::ppc_altivec_vcmpgtuw_p:
15914 case Intrinsic::ppc_altivec_vcmpgtud_p:
15915 case Intrinsic::ppc_altivec_vcmpgtuq_p:
15916 Known.Zero = ~1U; // All bits but the low one are known to be zero.
15917 break;
15918 }
15919 break;
15920 }
15922 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
15923 default:
15924 break;
15925 case Intrinsic::ppc_load2r:
15926 // Top bits are cleared for load2r (which is the same as lhbrx).
15927 Known.Zero = 0xFFFF0000;
15928 break;
15929 }
15930 break;
15931 }
15932 }
15933}
15934
15936 switch (Subtarget.getCPUDirective()) {
15937 default: break;
15938 case PPC::DIR_970:
15939 case PPC::DIR_PWR4:
15940 case PPC::DIR_PWR5:
15941 case PPC::DIR_PWR5X:
15942 case PPC::DIR_PWR6:
15943 case PPC::DIR_PWR6X:
15944 case PPC::DIR_PWR7:
15945 case PPC::DIR_PWR8:
15946 case PPC::DIR_PWR9:
15947 case PPC::DIR_PWR10:
15948 case PPC::DIR_PWR_FUTURE: {
15949 if (!ML)
15950 break;
15951
15953 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15954 // so that we can decrease cache misses and branch-prediction misses.
15955 // Actual alignment of the loop will depend on the hotness check and other
15956 // logic in alignBlocks.
15957 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15958 return Align(32);
15959 }
15960
15961 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15962
15963 // For small loops (between 5 and 8 instructions), align to a 32-byte
15964 // boundary so that the entire loop fits in one instruction-cache line.
15965 uint64_t LoopSize = 0;
15966 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15967 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15968 LoopSize += TII->getInstSizeInBytes(*J);
15969 if (LoopSize > 32)
15970 break;
15971 }
15972
15973 if (LoopSize > 16 && LoopSize <= 32)
15974 return Align(32);
15975
15976 break;
15977 }
15978 }
15979
15981}
15982
15983/// getConstraintType - Given a constraint, return the type of
15984/// constraint it is for this target.
15987 if (Constraint.size() == 1) {
15988 switch (Constraint[0]) {
15989 default: break;
15990 case 'b':
15991 case 'r':
15992 case 'f':
15993 case 'd':
15994 case 'v':
15995 case 'y':
15996 return C_RegisterClass;
15997 case 'Z':
15998 // FIXME: While Z does indicate a memory constraint, it specifically
15999 // indicates an r+r address (used in conjunction with the 'y' modifier
16000 // in the replacement string). Currently, we're forcing the base
16001 // register to be r0 in the asm printer (which is interpreted as zero)
16002 // and forming the complete address in the second register. This is
16003 // suboptimal.
16004 return C_Memory;
16005 }
16006 } else if (Constraint == "wc") { // individual CR bits.
16007 return C_RegisterClass;
16008 } else if (Constraint == "wa" || Constraint == "wd" ||
16009 Constraint == "wf" || Constraint == "ws" ||
16010 Constraint == "wi" || Constraint == "ww") {
16011 return C_RegisterClass; // VSX registers.
16012 }
16013 return TargetLowering::getConstraintType(Constraint);
16014}
16015
16016/// Examine constraint type and operand type and determine a weight value.
16017/// This object must already have been set up with the operand type
16018/// and the current alternative constraint selected.
16021 AsmOperandInfo &info, const char *constraint) const {
16023 Value *CallOperandVal = info.CallOperandVal;
16024 // If we don't have a value, we can't do a match,
16025 // but allow it at the lowest weight.
16026 if (!CallOperandVal)
16027 return CW_Default;
16028 Type *type = CallOperandVal->getType();
16029
16030 // Look at the constraint type.
16031 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16032 return CW_Register; // an individual CR bit.
16033 else if ((StringRef(constraint) == "wa" ||
16034 StringRef(constraint) == "wd" ||
16035 StringRef(constraint) == "wf") &&
16036 type->isVectorTy())
16037 return CW_Register;
16038 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16039 return CW_Register; // just hold 64-bit integers data.
16040 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16041 return CW_Register;
16042 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16043 return CW_Register;
16044
16045 switch (*constraint) {
16046 default:
16048 break;
16049 case 'b':
16050 if (type->isIntegerTy())
16051 weight = CW_Register;
16052 break;
16053 case 'f':
16054 if (type->isFloatTy())
16055 weight = CW_Register;
16056 break;
16057 case 'd':
16058 if (type->isDoubleTy())
16059 weight = CW_Register;
16060 break;
16061 case 'v':
16062 if (type->isVectorTy())
16063 weight = CW_Register;
16064 break;
16065 case 'y':
16066 weight = CW_Register;
16067 break;
16068 case 'Z':
16069 weight = CW_Memory;
16070 break;
16071 }
16072 return weight;
16073}
16074
16075std::pair<unsigned, const TargetRegisterClass *>
16077 StringRef Constraint,
16078 MVT VT) const {
16079 if (Constraint.size() == 1) {
16080 // GCC RS6000 Constraint Letters
16081 switch (Constraint[0]) {
16082 case 'b': // R1-R31
16083 if (VT == MVT::i64 && Subtarget.isPPC64())
16084 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16085 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16086 case 'r': // R0-R31
16087 if (VT == MVT::i64 && Subtarget.isPPC64())
16088 return std::make_pair(0U, &PPC::G8RCRegClass);
16089 return std::make_pair(0U, &PPC::GPRCRegClass);
16090 // 'd' and 'f' constraints are both defined to be "the floating point
16091 // registers", where one is for 32-bit and the other for 64-bit. We don't
16092 // really care overly much here so just give them all the same reg classes.
16093 case 'd':
16094 case 'f':
16095 if (Subtarget.hasSPE()) {
16096 if (VT == MVT::f32 || VT == MVT::i32)
16097 return std::make_pair(0U, &PPC::GPRCRegClass);
16098 if (VT == MVT::f64 || VT == MVT::i64)
16099 return std::make_pair(0U, &PPC::SPERCRegClass);
16100 } else {
16101 if (VT == MVT::f32 || VT == MVT::i32)
16102 return std::make_pair(0U, &PPC::F4RCRegClass);
16103 if (VT == MVT::f64 || VT == MVT::i64)
16104 return std::make_pair(0U, &PPC::F8RCRegClass);
16105 }
16106 break;
16107 case 'v':
16108 if (Subtarget.hasAltivec() && VT.isVector())
16109 return std::make_pair(0U, &PPC::VRRCRegClass);
16110 else if (Subtarget.hasVSX())
16111 // Scalars in Altivec registers only make sense with VSX.
16112 return std::make_pair(0U, &PPC::VFRCRegClass);
16113 break;
16114 case 'y': // crrc
16115 return std::make_pair(0U, &PPC::CRRCRegClass);
16116 }
16117 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16118 // An individual CR bit.
16119 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16120 } else if ((Constraint == "wa" || Constraint == "wd" ||
16121 Constraint == "wf" || Constraint == "wi") &&
16122 Subtarget.hasVSX()) {
16123 // A VSX register for either a scalar (FP) or vector. There is no
16124 // support for single precision scalars on subtargets prior to Power8.
16125 if (VT.isVector())
16126 return std::make_pair(0U, &PPC::VSRCRegClass);
16127 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16128 return std::make_pair(0U, &PPC::VSSRCRegClass);
16129 return std::make_pair(0U, &PPC::VSFRCRegClass);
16130 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16131 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16132 return std::make_pair(0U, &PPC::VSSRCRegClass);
16133 else
16134 return std::make_pair(0U, &PPC::VSFRCRegClass);
16135 } else if (Constraint == "lr") {
16136 if (VT == MVT::i64)
16137 return std::make_pair(0U, &PPC::LR8RCRegClass);
16138 else
16139 return std::make_pair(0U, &PPC::LRRCRegClass);
16140 }
16141
16142 // Handle special cases of physical registers that are not properly handled
16143 // by the base class.
16144 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16145 // If we name a VSX register, we can't defer to the base class because it
16146 // will not recognize the correct register (their names will be VSL{0-31}
16147 // and V{0-31} so they won't match). So we match them here.
16148 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16149 int VSNum = atoi(Constraint.data() + 3);
16150 assert(VSNum >= 0 && VSNum <= 63 &&
16151 "Attempted to access a vsr out of range");
16152 if (VSNum < 32)
16153 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16154 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16155 }
16156
16157 // For float registers, we can't defer to the base class as it will match
16158 // the SPILLTOVSRRC class.
16159 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16160 int RegNum = atoi(Constraint.data() + 2);
16161 if (RegNum > 31 || RegNum < 0)
16162 report_fatal_error("Invalid floating point register number");
16163 if (VT == MVT::f32 || VT == MVT::i32)
16164 return Subtarget.hasSPE()
16165 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16166 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16167 if (VT == MVT::f64 || VT == MVT::i64)
16168 return Subtarget.hasSPE()
16169 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16170 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16171 }
16172 }
16173
16174 std::pair<unsigned, const TargetRegisterClass *> R =
16176
16177 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16178 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16179 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16180 // register.
16181 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16182 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16183 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16184 PPC::GPRCRegClass.contains(R.first))
16185 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16186 PPC::sub_32, &PPC::G8RCRegClass),
16187 &PPC::G8RCRegClass);
16188
16189 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16190 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16191 R.first = PPC::CR0;
16192 R.second = &PPC::CRRCRegClass;
16193 }
16194 // FIXME: This warning should ideally be emitted in the front end.
16195 const auto &TM = getTargetMachine();
16196 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16197 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16198 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16199 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16200 errs() << "warning: vector registers 20 to 32 are reserved in the "
16201 "default AIX AltiVec ABI and cannot be used\n";
16202 }
16203
16204 return R;
16205}
16206
16207/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16208/// vector. If it is invalid, don't add anything to Ops.
16210 std::string &Constraint,
16211 std::vector<SDValue>&Ops,
16212 SelectionDAG &DAG) const {
16213 SDValue Result;
16214
16215 // Only support length 1 constraints.
16216 if (Constraint.length() > 1) return;
16217
16218 char Letter = Constraint[0];
16219 switch (Letter) {
16220 default: break;
16221 case 'I':
16222 case 'J':
16223 case 'K':
16224 case 'L':
16225 case 'M':
16226 case 'N':
16227 case 'O':
16228 case 'P': {
16230 if (!CST) return; // Must be an immediate to match.
16231 SDLoc dl(Op);
16232 int64_t Value = CST->getSExtValue();
16233 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16234 // numbers are printed as such.
16235 switch (Letter) {
16236 default: llvm_unreachable("Unknown constraint letter!");
16237 case 'I': // "I" is a signed 16-bit constant.
16238 if (isInt<16>(Value))
16239 Result = DAG.getTargetConstant(Value, dl, TCVT);
16240 break;
16241 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16243 Result = DAG.getTargetConstant(Value, dl, TCVT);
16244 break;
16245 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16247 Result = DAG.getTargetConstant(Value, dl, TCVT);
16248 break;
16249 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16250 if (isUInt<16>(Value))
16251 Result = DAG.getTargetConstant(Value, dl, TCVT);
16252 break;
16253 case 'M': // "M" is a constant that is greater than 31.
16254 if (Value > 31)
16255 Result = DAG.getTargetConstant(Value, dl, TCVT);
16256 break;
16257 case 'N': // "N" is a positive constant that is an exact power of two.
16258 if (Value > 0 && isPowerOf2_64(Value))
16259 Result = DAG.getTargetConstant(Value, dl, TCVT);
16260 break;
16261 case 'O': // "O" is the constant zero.
16262 if (Value == 0)
16263 Result = DAG.getTargetConstant(Value, dl, TCVT);
16264 break;
16265 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16266 if (isInt<16>(-Value))
16267 Result = DAG.getTargetConstant(Value, dl, TCVT);
16268 break;
16269 }
16270 break;
16271 }
16272 }
16273
16274 if (Result.getNode()) {
16275 Ops.push_back(Result);
16276 return;
16277 }
16278
16279 // Handle standard constraint letters.
16280 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16281}
16282
16283// isLegalAddressingMode - Return true if the addressing mode represented
16284// by AM is legal for this target, for a load/store of the specified type.
16286 const AddrMode &AM, Type *Ty,
16287 unsigned AS,
16288 Instruction *I) const {
16289 // Vector type r+i form is supported since power9 as DQ form. We don't check
16290 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16291 // imm form is preferred and the offset can be adjusted to use imm form later
16292 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16293 // max offset to check legal addressing mode, we should be a little aggressive
16294 // to contain other offsets for that LSRUse.
16295 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16296 return false;
16297
16298 // PPC allows a sign-extended 16-bit immediate field.
16299 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16300 return false;
16301
16302 // No global is ever allowed as a base.
16303 if (AM.BaseGV)
16304 return false;
16305
16306 // PPC only support r+r,
16307 switch (AM.Scale) {
16308 case 0: // "r+i" or just "i", depending on HasBaseReg.
16309 break;
16310 case 1:
16311 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16312 return false;
16313 // Otherwise we have r+r or r+i.
16314 break;
16315 case 2:
16316 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
16317 return false;
16318 // Allow 2*r as r+r.
16319 break;
16320 default:
16321 // No other scales are supported.
16322 return false;
16323 }
16324
16325 return true;
16326}
16327
16328SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
16329 SelectionDAG &DAG) const {
16331 MachineFrameInfo &MFI = MF.getFrameInfo();
16332 MFI.setReturnAddressIsTaken(true);
16333
16335 return SDValue();
16336
16337 SDLoc dl(Op);
16338 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16339
16340 // Make sure the function does not optimize away the store of the RA to
16341 // the stack.
16342 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
16343 FuncInfo->setLRStoreRequired();
16344 bool isPPC64 = Subtarget.isPPC64();
16345 auto PtrVT = getPointerTy(MF.getDataLayout());
16346
16347 if (Depth > 0) {
16348 // The link register (return address) is saved in the caller's frame
16349 // not the callee's stack frame. So we must get the caller's frame
16350 // address and load the return address at the LR offset from there.
16351 SDValue FrameAddr =
16352 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16353 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
16354 SDValue Offset =
16355 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
16356 isPPC64 ? MVT::i64 : MVT::i32);
16357 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
16358 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
16360 }
16361
16362 // Just load the return address off the stack.
16363 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16364 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
16366}
16367
16368SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
16369 SelectionDAG &DAG) const {
16370 SDLoc dl(Op);
16371 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16372
16374 MachineFrameInfo &MFI = MF.getFrameInfo();
16375 MFI.setFrameAddressIsTaken(true);
16376
16377 EVT PtrVT = getPointerTy(MF.getDataLayout());
16378 bool isPPC64 = PtrVT == MVT::i64;
16379
16380 // Naked functions never have a frame pointer, and so we use r1. For all
16381 // other functions, this decision must be delayed until during PEI.
16382 unsigned FrameReg;
16383 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
16384 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16385 else
16386 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16387
16388 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
16389 PtrVT);
16390 while (Depth--)
16391 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16392 FrameAddr, MachinePointerInfo());
16393 return FrameAddr;
16394}
16395
16396// FIXME? Maybe this could be a TableGen attribute on some registers and
16397// this table could be generated automatically from RegInfo.
16399 const MachineFunction &MF) const {
16400 bool isPPC64 = Subtarget.isPPC64();
16401
16402 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16403 if (!is64Bit && VT != LLT::scalar(32))
16404 report_fatal_error("Invalid register global variable type");
16405
16407 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16408 .Case("r2", isPPC64 ? Register() : PPC::R2)
16409 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16410 .Default(Register());
16411
16412 if (Reg)
16413 return Reg;
16414 report_fatal_error("Invalid register name global variable");
16415}
16416
16418 // 32-bit SVR4 ABI access everything as got-indirect.
16419 if (Subtarget.is32BitELFABI())
16420 return true;
16421
16422 // AIX accesses everything indirectly through the TOC, which is similar to
16423 // the GOT.
16424 if (Subtarget.isAIXABI())
16425 return true;
16426
16428 // If it is small or large code model, module locals are accessed
16429 // indirectly by loading their address from .toc/.got.
16430 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
16431 return true;
16432
16433 // JumpTable and BlockAddress are accessed as got-indirect.
16435 return true;
16436
16438 return Subtarget.isGVIndirectSymbol(G->getGlobal());
16439
16440 return false;
16441}
16442
16443bool
16445 // The PowerPC target isn't yet aware of offsets.
16446 return false;
16447}
16448
16450 const CallInst &I,
16451 MachineFunction &MF,
16452 unsigned Intrinsic) const {
16453 switch (Intrinsic) {
16454 case Intrinsic::ppc_atomicrmw_xchg_i128:
16455 case Intrinsic::ppc_atomicrmw_add_i128:
16456 case Intrinsic::ppc_atomicrmw_sub_i128:
16457 case Intrinsic::ppc_atomicrmw_nand_i128:
16458 case Intrinsic::ppc_atomicrmw_and_i128:
16459 case Intrinsic::ppc_atomicrmw_or_i128:
16460 case Intrinsic::ppc_atomicrmw_xor_i128:
16461 case Intrinsic::ppc_cmpxchg_i128:
16462 Info.opc = ISD::INTRINSIC_W_CHAIN;
16463 Info.memVT = MVT::i128;
16464 Info.ptrVal = I.getArgOperand(0);
16465 Info.offset = 0;
16466 Info.align = Align(16);
16469 return true;
16470 case Intrinsic::ppc_atomic_load_i128:
16471 Info.opc = ISD::INTRINSIC_W_CHAIN;
16472 Info.memVT = MVT::i128;
16473 Info.ptrVal = I.getArgOperand(0);
16474 Info.offset = 0;
16475 Info.align = Align(16);
16477 return true;
16478 case Intrinsic::ppc_atomic_store_i128:
16479 Info.opc = ISD::INTRINSIC_VOID;
16480 Info.memVT = MVT::i128;
16481 Info.ptrVal = I.getArgOperand(2);
16482 Info.offset = 0;
16483 Info.align = Align(16);
16485 return true;
16486 case Intrinsic::ppc_altivec_lvx:
16487 case Intrinsic::ppc_altivec_lvxl:
16488 case Intrinsic::ppc_altivec_lvebx:
16489 case Intrinsic::ppc_altivec_lvehx:
16490 case Intrinsic::ppc_altivec_lvewx:
16491 case Intrinsic::ppc_vsx_lxvd2x:
16492 case Intrinsic::ppc_vsx_lxvw4x:
16493 case Intrinsic::ppc_vsx_lxvd2x_be:
16494 case Intrinsic::ppc_vsx_lxvw4x_be:
16495 case Intrinsic::ppc_vsx_lxvl:
16496 case Intrinsic::ppc_vsx_lxvll: {
16497 EVT VT;
16498 switch (Intrinsic) {
16499 case Intrinsic::ppc_altivec_lvebx:
16500 VT = MVT::i8;
16501 break;
16502 case Intrinsic::ppc_altivec_lvehx:
16503 VT = MVT::i16;
16504 break;
16505 case Intrinsic::ppc_altivec_lvewx:
16506 VT = MVT::i32;
16507 break;
16508 case Intrinsic::ppc_vsx_lxvd2x:
16509 case Intrinsic::ppc_vsx_lxvd2x_be:
16510 VT = MVT::v2f64;
16511 break;
16512 default:
16513 VT = MVT::v4i32;
16514 break;
16515 }
16516
16517 Info.opc = ISD::INTRINSIC_W_CHAIN;
16518 Info.memVT = VT;
16519 Info.ptrVal = I.getArgOperand(0);
16520 Info.offset = -VT.getStoreSize()+1;
16521 Info.size = 2*VT.getStoreSize()-1;
16522 Info.align = Align(1);
16523 Info.flags = MachineMemOperand::MOLoad;
16524 return true;
16525 }
16526 case Intrinsic::ppc_altivec_stvx:
16527 case Intrinsic::ppc_altivec_stvxl:
16528 case Intrinsic::ppc_altivec_stvebx:
16529 case Intrinsic::ppc_altivec_stvehx:
16530 case Intrinsic::ppc_altivec_stvewx:
16531 case Intrinsic::ppc_vsx_stxvd2x:
16532 case Intrinsic::ppc_vsx_stxvw4x:
16533 case Intrinsic::ppc_vsx_stxvd2x_be:
16534 case Intrinsic::ppc_vsx_stxvw4x_be:
16535 case Intrinsic::ppc_vsx_stxvl:
16536 case Intrinsic::ppc_vsx_stxvll: {
16537 EVT VT;
16538 switch (Intrinsic) {
16539 case Intrinsic::ppc_altivec_stvebx:
16540 VT = MVT::i8;
16541 break;
16542 case Intrinsic::ppc_altivec_stvehx:
16543 VT = MVT::i16;
16544 break;
16545 case Intrinsic::ppc_altivec_stvewx:
16546 VT = MVT::i32;
16547 break;
16548 case Intrinsic::ppc_vsx_stxvd2x:
16549 case Intrinsic::ppc_vsx_stxvd2x_be:
16550 VT = MVT::v2f64;
16551 break;
16552 default:
16553 VT = MVT::v4i32;
16554 break;
16555 }
16556
16557 Info.opc = ISD::INTRINSIC_VOID;
16558 Info.memVT = VT;
16559 Info.ptrVal = I.getArgOperand(1);
16560 Info.offset = -VT.getStoreSize()+1;
16561 Info.size = 2*VT.getStoreSize()-1;
16562 Info.align = Align(1);
16563 Info.flags = MachineMemOperand::MOStore;
16564 return true;
16565 }
16566 default:
16567 break;
16568 }
16569
16570 return false;
16571}
16572
16573/// It returns EVT::Other if the type should be determined using generic
16574/// target-independent logic.
16576 const MemOp &Op, const AttributeList &FuncAttributes) const {
16577 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16578 // We should use Altivec/VSX loads and stores when available. For unaligned
16579 // addresses, unaligned VSX loads are only fast starting with the P8.
16580 if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16581 (Op.isAligned(Align(16)) ||
16582 ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16583 return MVT::v4i32;
16584 }
16585
16586 if (Subtarget.isPPC64()) {
16587 return MVT::i64;
16588 }
16589
16590 return MVT::i32;
16591}
16592
16593/// Returns true if it is beneficial to convert a load of a constant
16594/// to just the constant itself.
16596 Type *Ty) const {
16597 assert(Ty->isIntegerTy());
16598
16599 unsigned BitSize = Ty->getPrimitiveSizeInBits();
16600 return !(BitSize == 0 || BitSize > 64);
16601}
16602
16604 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16605 return false;
16606 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16607 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16608 return NumBits1 == 64 && NumBits2 == 32;
16609}
16610
16612 if (!VT1.isInteger() || !VT2.isInteger())
16613 return false;
16614 unsigned NumBits1 = VT1.getSizeInBits();
16615 unsigned NumBits2 = VT2.getSizeInBits();
16616 return NumBits1 == 64 && NumBits2 == 32;
16617}
16618
16620 // Generally speaking, zexts are not free, but they are free when they can be
16621 // folded with other operations.
16622 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16623 EVT MemVT = LD->getMemoryVT();
16624 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16625 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16626 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16627 LD->getExtensionType() == ISD::ZEXTLOAD))
16628 return true;
16629 }
16630
16631 // FIXME: Add other cases...
16632 // - 32-bit shifts with a zext to i64
16633 // - zext after ctlz, bswap, etc.
16634 // - zext after and by a constant mask
16635
16636 return TargetLowering::isZExtFree(Val, VT2);
16637}
16638
16639bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16640 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
16641 "invalid fpext types");
16642 // Extending to float128 is not free.
16643 if (DestVT == MVT::f128)
16644 return false;
16645 return true;
16646}
16647
16649 return isInt<16>(Imm) || isUInt<16>(Imm);
16650}
16651
16653 return isInt<16>(Imm) || isUInt<16>(Imm);
16654}
16655
16658 bool *Fast) const {
16660 return false;
16661
16662 // PowerPC supports unaligned memory access for simple non-vector types.
16663 // Although accessing unaligned addresses is not as efficient as accessing
16664 // aligned addresses, it is generally more efficient than manual expansion,
16665 // and generally only traps for software emulation when crossing page
16666 // boundaries.
16667
16668 if (!VT.isSimple())
16669 return false;
16670
16671 if (VT.isFloatingPoint() && !VT.isVector() &&
16672 !Subtarget.allowsUnalignedFPAccess())
16673 return false;
16674
16675 if (VT.getSimpleVT().isVector()) {
16676 if (Subtarget.hasVSX()) {
16677 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16678 VT != MVT::v4f32 && VT != MVT::v4i32)
16679 return false;
16680 } else {
16681 return false;
16682 }
16683 }
16684
16685 if (VT == MVT::ppcf128)
16686 return false;
16687
16688 if (Fast)
16689 *Fast = true;
16690
16691 return true;
16692}
16693
16695 SDValue C) const {
16696 // Check integral scalar types.
16697 if (!VT.isScalarInteger())
16698 return false;
16699 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16700 if (!ConstNode->getAPIntValue().isSignedIntN(64))
16701 return false;
16702 // This transformation will generate >= 2 operations. But the following
16703 // cases will generate <= 2 instructions during ISEL. So exclude them.
16704 // 1. If the constant multiplier fits 16 bits, it can be handled by one
16705 // HW instruction, ie. MULLI
16706 // 2. If the multiplier after shifted fits 16 bits, an extra shift
16707 // instruction is needed than case 1, ie. MULLI and RLDICR
16708 int64_t Imm = ConstNode->getSExtValue();
16709 unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16710 Imm >>= Shift;
16711 if (isInt<16>(Imm))
16712 return false;
16713 uint64_t UImm = static_cast<uint64_t>(Imm);
16714 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16715 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16716 return true;
16717 }
16718 return false;
16719}
16720
16726
16728 Type *Ty) const {
16729 switch (Ty->getScalarType()->getTypeID()) {
16730 case Type::FloatTyID:
16731 case Type::DoubleTyID:
16732 return true;
16733 case Type::FP128TyID:
16734 return Subtarget.hasP9Vector();
16735 default:
16736 return false;
16737 }
16738}
16739
16740// FIXME: add more patterns which are not profitable to hoist.
16742 if (!I->hasOneUse())
16743 return true;
16744
16746 assert(User && "A single use instruction with no uses.");
16747
16748 switch (I->getOpcode()) {
16749 case Instruction::FMul: {
16750 // Don't break FMA, PowerPC prefers FMA.
16751 if (User->getOpcode() != Instruction::FSub &&
16752 User->getOpcode() != Instruction::FAdd)
16753 return true;
16754
16756 const Function *F = I->getFunction();
16757 const DataLayout &DL = F->getParent()->getDataLayout();
16758 Type *Ty = User->getOperand(0)->getType();
16759
16760 return !(
16763 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16764 }
16765 case Instruction::Load: {
16766 // Don't break "store (load float*)" pattern, this pattern will be combined
16767 // to "store (load int32)" in later InstCombine pass. See function
16768 // combineLoadToOperationType. On PowerPC, loading a float point takes more
16769 // cycles than loading a 32 bit integer.
16770 LoadInst *LI = cast<LoadInst>(I);
16771 // For the loads that combineLoadToOperationType does nothing, like
16772 // ordered load, it should be profitable to hoist them.
16773 // For swifterror load, it can only be used for pointer to pointer type, so
16774 // later type check should get rid of this case.
16775 if (!LI->isUnordered())
16776 return true;
16777
16778 if (User->getOpcode() != Instruction::Store)
16779 return true;
16780
16781 if (I->getType()->getTypeID() != Type::FloatTyID)
16782 return true;
16783
16784 return false;
16785 }
16786 default:
16787 return true;
16788 }
16789 return true;
16790}
16791
16792const MCPhysReg *
16794 // LR is a callee-save register, but we must treat it as clobbered by any call
16795 // site. Hence we include LR in the scratch registers, which are in turn added
16796 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16797 // to CTR, which is used by any indirect call.
16798 static const MCPhysReg ScratchRegs[] = {
16799 PPC::X12, PPC::LR8, PPC::CTR8, 0
16800 };
16801
16802 return ScratchRegs;
16803}
16804
16806 const Constant *PersonalityFn) const {
16807 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16808}
16809
16811 const Constant *PersonalityFn) const {
16812 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16813}
16814
16815bool
16817 EVT VT , unsigned DefinedValues) const {
16818 if (VT == MVT::v2i64)
16819 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16820
16821 if (Subtarget.hasVSX())
16822 return true;
16823
16825}
16826
16833
16834// Create a fast isel object.
16835FastISel *
16837 const TargetLibraryInfo *LibInfo) const {
16838 return PPC::createFastISel(FuncInfo, LibInfo);
16839}
16840
16841// 'Inverted' means the FMA opcode after negating one multiplicand.
16842// For example, (fma -a b c) = (fnmsub a b c)
16843static unsigned invertFMAOpcode(unsigned Opc) {
16844 switch (Opc) {
16845 default:
16846 llvm_unreachable("Invalid FMA opcode for PowerPC!");
16847 case ISD::FMA:
16848 return PPCISD::FNMSUB;
16849 case PPCISD::FNMSUB:
16850 return ISD::FMA;
16851 }
16852}
16853
16855 bool LegalOps, bool OptForSize,
16856 NegatibleCost &Cost,
16857 unsigned Depth) const {
16859 return SDValue();
16860
16861 unsigned Opc = Op.getOpcode();
16862 EVT VT = Op.getValueType();
16863 SDNodeFlags Flags = Op.getNode()->getFlags();
16864
16865 switch (Opc) {
16866 case PPCISD::FNMSUB:
16867 if (!Op.hasOneUse() || !isTypeLegal(VT))
16868 break;
16869
16871 SDValue N0 = Op.getOperand(0);
16872 SDValue N1 = Op.getOperand(1);
16873 SDValue N2 = Op.getOperand(2);
16874 SDLoc Loc(Op);
16875
16877 SDValue NegN2 =
16878 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16879
16880 if (!NegN2)
16881 return SDValue();
16882
16883 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16884 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16885 // These transformations may change sign of zeroes. For example,
16886 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16887 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16888 // Try and choose the cheaper one to negate.
16890 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16891 N0Cost, Depth + 1);
16892
16894 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16895 N1Cost, Depth + 1);
16896
16897 if (NegN0 && N0Cost <= N1Cost) {
16898 Cost = std::min(N0Cost, N2Cost);
16899 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16900 } else if (NegN1) {
16901 Cost = std::min(N1Cost, N2Cost);
16902 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16903 }
16904 }
16905
16906 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16907 if (isOperationLegal(ISD::FMA, VT)) {
16908 Cost = N2Cost;
16909 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16910 }
16911
16912 break;
16913 }
16914
16915 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16916 Cost, Depth);
16917}
16918
16919// Override to enable LOAD_STACK_GUARD lowering on Linux.
16921 if (!Subtarget.isTargetLinux())
16923 return true;
16924}
16925
16926// Override to disable global variable loading on Linux and insert AIX canary
16927// word declaration.
16929 if (Subtarget.isAIXABI()) {
16930 M.getOrInsertGlobal(AIXSSPCanaryWordName,
16931 Type::getInt8PtrTy(M.getContext()));
16932 return;
16933 }
16934 if (!Subtarget.isTargetLinux())
16936}
16937
16939 if (Subtarget.isAIXABI())
16940 return M.getGlobalVariable(AIXSSPCanaryWordName);
16942}
16943
16945 bool ForCodeSize) const {
16946 if (!VT.isSimple() || !Subtarget.hasVSX())
16947 return false;
16948
16949 switch(VT.getSimpleVT().SimpleTy) {
16950 default:
16951 // For FP types that are currently not supported by PPC backend, return
16952 // false. Examples: f16, f80.
16953 return false;
16954 case MVT::f32:
16955 case MVT::f64:
16956 if (Subtarget.hasPrefixInstrs()) {
16957 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
16958 return true;
16959 }
16961 case MVT::ppcf128:
16962 return Imm.isPosZero();
16963 }
16964}
16965
16966// For vector shift operation op, fold
16967// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16969 SelectionDAG &DAG) {
16970 SDValue N0 = N->getOperand(0);
16971 SDValue N1 = N->getOperand(1);
16972 EVT VT = N0.getValueType();
16973 unsigned OpSizeInBits = VT.getScalarSizeInBits();
16974 unsigned Opcode = N->getOpcode();
16975 unsigned TargetOpcode;
16976
16977 switch (Opcode) {
16978 default:
16979 llvm_unreachable("Unexpected shift operation");
16980 case ISD::SHL:
16981 TargetOpcode = PPCISD::SHL;
16982 break;
16983 case ISD::SRL:
16984 TargetOpcode = PPCISD::SRL;
16985 break;
16986 case ISD::SRA:
16987 TargetOpcode = PPCISD::SRA;
16988 break;
16989 }
16990
16991 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16992 N1->getOpcode() == ISD::AND)
16993 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16994 if (Mask->getZExtValue() == OpSizeInBits - 1)
16995 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16996
16997 return SDValue();
16998}
16999
17000SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17001 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17002 return Value;
17003
17004 SDValue N0 = N->getOperand(0);
17005 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17006 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17007 N0.getOpcode() != ISD::SIGN_EXTEND ||
17008 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17009 N->getValueType(0) != MVT::i64)
17010 return SDValue();
17011
17012 // We can't save an operation here if the value is already extended, and
17013 // the existing shift is easier to combine.
17014 SDValue ExtsSrc = N0.getOperand(0);
17015 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17016 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17017 return SDValue();
17018
17019 SDLoc DL(N0);
17020 SDValue ShiftBy = SDValue(CN1, 0);
17021 // We want the shift amount to be i32 on the extswli, but the shift could
17022 // have an i64.
17023 if (ShiftBy.getValueType() == MVT::i64)
17024 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17025
17026 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17027 ShiftBy);
17028}
17029
17030SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17031 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17032 return Value;
17033
17034 return SDValue();
17035}
17036
17037SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17038 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17039 return Value;
17040
17041 return SDValue();
17042}
17043
17044// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17045// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17046// When C is zero, the equation (addi Z, -C) can be simplified to Z
17047// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17049 const PPCSubtarget &Subtarget) {
17050 if (!Subtarget.isPPC64())
17051 return SDValue();
17052
17053 SDValue LHS = N->getOperand(0);
17054 SDValue RHS = N->getOperand(1);
17055
17056 auto isZextOfCompareWithConstant = [](SDValue Op) {
17057 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17058 Op.getValueType() != MVT::i64)
17059 return false;
17060
17061 SDValue Cmp = Op.getOperand(0);
17062 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17063 Cmp.getOperand(0).getValueType() != MVT::i64)
17064 return false;
17065
17066 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17067 int64_t NegConstant = 0 - Constant->getSExtValue();
17068 // Due to the limitations of the addi instruction,
17069 // -C is required to be [-32768, 32767].
17070 return isInt<16>(NegConstant);
17071 }
17072
17073 return false;
17074 };
17075
17076 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17077 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17078
17079 // If there is a pattern, canonicalize a zext operand to the RHS.
17080 if (LHSHasPattern && !RHSHasPattern)
17081 std::swap(LHS, RHS);
17082 else if (!LHSHasPattern && !RHSHasPattern)
17083 return SDValue();
17084
17085 SDLoc DL(N);
17087 SDValue Cmp = RHS.getOperand(0);
17088 SDValue Z = Cmp.getOperand(0);
17089 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17090 int64_t NegConstant = 0 - Constant->getSExtValue();
17091
17092 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17093 default: break;
17094 case ISD::SETNE: {
17095 // when C == 0
17096 // --> addze X, (addic Z, -1).carry
17097 // /
17098 // add X, (zext(setne Z, C))--
17099 // \ when -32768 <= -C <= 32767 && C != 0
17100 // --> addze X, (addic (addi Z, -C), -1).carry
17102 DAG.getConstant(NegConstant, DL, MVT::i64));
17103 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17105 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17106 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17107 SDValue(Addc.getNode(), 1));
17108 }
17109 case ISD::SETEQ: {
17110 // when C == 0
17111 // --> addze X, (subfic Z, 0).carry
17112 // /
17113 // add X, (zext(sete Z, C))--
17114 // \ when -32768 <= -C <= 32767 && C != 0
17115 // --> addze X, (subfic (addi Z, -C), 0).carry
17117 DAG.getConstant(NegConstant, DL, MVT::i64));
17118 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17120 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17121 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17122 SDValue(Subc.getNode(), 1));
17123 }
17124 }
17125
17126 return SDValue();
17127}
17128
17129// Transform
17130// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17131// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17132// In this case both C1 and C2 must be known constants.
17133// C1+C2 must fit into a 34 bit signed integer.
17135 const PPCSubtarget &Subtarget) {
17136 if (!Subtarget.isUsingPCRelativeCalls())
17137 return SDValue();
17138
17139 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17140 // If we find that node try to cast the Global Address and the Constant.
17141 SDValue LHS = N->getOperand(0);
17142 SDValue RHS = N->getOperand(1);
17143
17144 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17145 std::swap(LHS, RHS);
17146
17147 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17148 return SDValue();
17149
17150 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17151 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17152 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17153
17154 // Check that both casts succeeded.
17155 if (!GSDN || !ConstNode)
17156 return SDValue();
17157
17158 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17159 SDLoc DL(GSDN);
17160
17161 // The signed int offset needs to fit in 34 bits.
17162 if (!isInt<34>(NewOffset))
17163 return SDValue();
17164
17165 // The new global address is a copy of the old global address except
17166 // that it has the updated Offset.
17167 SDValue GA =
17168 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17169 NewOffset, GSDN->getTargetFlags());
17170 SDValue MatPCRel =
17171 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17172 return MatPCRel;
17173}
17174
17175SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17176 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17177 return Value;
17178
17179 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17180 return Value;
17181
17182 return SDValue();
17183}
17184
17185// Detect TRUNCATE operations on bitcasts of float128 values.
17186// What we are looking for here is the situtation where we extract a subset
17187// of bits from a 128 bit float.
17188// This can be of two forms:
17189// 1) BITCAST of f128 feeding TRUNCATE
17190// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17191// The reason this is required is because we do not have a legal i128 type
17192// and so we want to prevent having to store the f128 and then reload part
17193// of it.
17194SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17195 DAGCombinerInfo &DCI) const {
17196 // If we are using CRBits then try that first.
17197 if (Subtarget.useCRBits()) {
17198 // Check if CRBits did anything and return that if it did.
17199 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17200 return CRTruncValue;
17201 }
17202
17203 SDLoc dl(N);
17204 SDValue Op0 = N->getOperand(0);
17205
17206 // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
17207 if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
17208 EVT VT = N->getValueType(0);
17209 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17210 return SDValue();
17211 SDValue Sub = Op0.getOperand(0);
17212 if (Sub.getOpcode() == ISD::SUB) {
17213 SDValue SubOp0 = Sub.getOperand(0);
17214 SDValue SubOp1 = Sub.getOperand(1);
17215 if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
17216 (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
17217 return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
17218 SubOp1.getOperand(0),
17219 DCI.DAG.getTargetConstant(0, dl, MVT::i32));
17220 }
17221 }
17222 }
17223
17224 // Looking for a truncate of i128 to i64.
17225 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17226 return SDValue();
17227
17228 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17229
17230 // SRL feeding TRUNCATE.
17231 if (Op0.getOpcode() == ISD::SRL) {
17233 // The right shift has to be by 64 bits.
17234 if (!ConstNode || ConstNode->getZExtValue() != 64)
17235 return SDValue();
17236
17237 // Switch the element number to extract.
17238 EltToExtract = EltToExtract ? 0 : 1;
17239 // Update Op0 past the SRL.
17240 Op0 = Op0.getOperand(0);
17241 }
17242
17243 // BITCAST feeding a TRUNCATE possibly via SRL.
17244 if (Op0.getOpcode() == ISD::BITCAST &&
17245 Op0.getValueType() == MVT::i128 &&
17246 Op0.getOperand(0).getValueType() == MVT::f128) {
17247 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17248 return DCI.DAG.getNode(
17249 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
17250 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17251 }
17252 return SDValue();
17253}
17254
17255SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17256 SelectionDAG &DAG = DCI.DAG;
17257
17258 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
17259 if (!ConstOpOrElement)
17260 return SDValue();
17261
17262 // An imul is usually smaller than the alternative sequence for legal type.
17264 isOperationLegal(ISD::MUL, N->getValueType(0)))
17265 return SDValue();
17266
17267 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17268 switch (this->Subtarget.getCPUDirective()) {
17269 default:
17270 // TODO: enhance the condition for subtarget before pwr8
17271 return false;
17272 case PPC::DIR_PWR8:
17273 // type mul add shl
17274 // scalar 4 1 1
17275 // vector 7 2 2
17276 return true;
17277 case PPC::DIR_PWR9:
17278 case PPC::DIR_PWR10:
17280 // type mul add shl
17281 // scalar 5 2 2
17282 // vector 7 2 2
17283
17284 // The cycle RATIO of related operations are showed as a table above.
17285 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17286 // scalar and vector type. For 2 instrs patterns, add/sub + shl
17287 // are 4, it is always profitable; but for 3 instrs patterns
17288 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17289 // So we should only do it for vector type.
17290 return IsAddOne && IsNeg ? VT.isVector() : true;
17291 }
17292 };
17293
17294 EVT VT = N->getValueType(0);
17295 SDLoc DL(N);
17296
17297 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
17298 bool IsNeg = MulAmt.isNegative();
17299 APInt MulAmtAbs = MulAmt.abs();
17300
17301 if ((MulAmtAbs - 1).isPowerOf2()) {
17302 // (mul x, 2^N + 1) => (add (shl x, N), x)
17303 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
17304
17305 if (!IsProfitable(IsNeg, true, VT))
17306 return SDValue();
17307
17308 SDValue Op0 = N->getOperand(0);
17309 SDValue Op1 =
17310 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17311 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
17312 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
17313
17314 if (!IsNeg)
17315 return Res;
17316
17317 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
17318 } else if ((MulAmtAbs + 1).isPowerOf2()) {
17319 // (mul x, 2^N - 1) => (sub (shl x, N), x)
17320 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
17321
17322 if (!IsProfitable(IsNeg, false, VT))
17323 return SDValue();
17324
17325 SDValue Op0 = N->getOperand(0);
17326 SDValue Op1 =
17327 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17328 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
17329
17330 if (!IsNeg)
17331 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
17332 else
17333 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
17334
17335 } else {
17336 return SDValue();
17337 }
17338}
17339
17340// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
17341// in combiner since we need to check SD flags and other subtarget features.
17342SDValue PPCTargetLowering::combineFMALike(SDNode *N,
17343 DAGCombinerInfo &DCI) const {
17344 SDValue N0 = N->getOperand(0);
17345 SDValue N1 = N->getOperand(1);
17346 SDValue N2 = N->getOperand(2);
17347 SDNodeFlags Flags = N->getFlags();
17348 EVT VT = N->getValueType(0);
17349 SelectionDAG &DAG = DCI.DAG;
17351 unsigned Opc = N->getOpcode();
17352 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
17353 bool LegalOps = !DCI.isBeforeLegalizeOps();
17354 SDLoc Loc(N);
17355
17356 if (!isOperationLegal(ISD::FMA, VT))
17357 return SDValue();
17358
17359 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
17360 // since (fnmsub a b c)=-0 while c-ab=+0.
17361 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
17362 return SDValue();
17363
17364 // (fma (fneg a) b c) => (fnmsub a b c)
17365 // (fnmsub (fneg a) b c) => (fma a b c)
17366 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
17367 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
17368
17369 // (fma a (fneg b) c) => (fnmsub a b c)
17370 // (fnmsub a (fneg b) c) => (fma a b c)
17371 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
17372 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
17373
17374 return SDValue();
17375}
17376
17377bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17378 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
17379 if (!Subtarget.is64BitELFABI())
17380 return false;
17381
17382 // If not a tail call then no need to proceed.
17383 if (!CI->isTailCall())
17384 return false;
17385
17386 // If sibling calls have been disabled and tail-calls aren't guaranteed
17387 // there is no reason to duplicate.
17388 auto &TM = getTargetMachine();
17389 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
17390 return false;
17391
17392 // Can't tail call a function called indirectly, or if it has variadic args.
17393 const Function *Callee = CI->getCalledFunction();
17394 if (!Callee || Callee->isVarArg())
17395 return false;
17396
17397 // Make sure the callee and caller calling conventions are eligible for tco.
17398 const Function *Caller = CI->getParent()->getParent();
17399 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
17400 CI->getCallingConv()))
17401 return false;
17402
17403 // If the function is local then we have a good chance at tail-calling it
17404 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
17405}
17406
17407bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
17408 if (!Subtarget.hasVSX())
17409 return false;
17410 if (Subtarget.hasP9Vector() && VT == MVT::f128)
17411 return true;
17412 return VT == MVT::f32 || VT == MVT::f64 ||
17413 VT == MVT::v4f32 || VT == MVT::v2f64;
17414}
17415
17416bool PPCTargetLowering::
17417isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17418 const Value *Mask = AndI.getOperand(1);
17419 // If the mask is suitable for andi. or andis. we should sink the and.
17420 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17421 // Can't handle constants wider than 64-bits.
17422 if (CI->getBitWidth() > 64)
17423 return false;
17424 int64_t ConstVal = CI->getZExtValue();
17425 return isUInt<16>(ConstVal) ||
17426 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17427 }
17428
17429 // For non-constant masks, we can always use the record-form and.
17430 return true;
17431}
17432
17433// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
17434// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
17435// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
17436// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
17437// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
17438SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
17439 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
17440 assert(Subtarget.hasP9Altivec() &&
17441 "Only combine this when P9 altivec supported!");
17442 EVT VT = N->getValueType(0);
17443 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17444 return SDValue();
17445
17446 SelectionDAG &DAG = DCI.DAG;
17447 SDLoc dl(N);
17448 if (N->getOperand(0).getOpcode() == ISD::SUB) {
17449 // Even for signed integers, if it's known to be positive (as signed
17450 // integer) due to zero-extended inputs.
17451 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
17452 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
17453 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
17454 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
17455 (SubOpcd1 == ISD::ZERO_EXTEND ||
17456 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
17457 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17458 N->getOperand(0)->getOperand(0),
17459 N->getOperand(0)->getOperand(1),
17460 DAG.getTargetConstant(0, dl, MVT::i32));
17461 }
17462
17463 // For type v4i32, it can be optimized with xvnegsp + vabsduw
17464 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
17465 N->getOperand(0).hasOneUse()) {
17466 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17467 N->getOperand(0)->getOperand(0),
17468 N->getOperand(0)->getOperand(1),
17469 DAG.getTargetConstant(1, dl, MVT::i32));
17470 }
17471 }
17472
17473 return SDValue();
17474}
17475
17476// For type v4i32/v8ii16/v16i8, transform
17477// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
17478// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
17479// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
17480// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
17481SDValue PPCTargetLowering::combineVSelect(SDNode *N,
17482 DAGCombinerInfo &DCI) const {
17483 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
17484 assert(Subtarget.hasP9Altivec() &&
17485 "Only combine this when P9 altivec supported!");
17486
17487 SelectionDAG &DAG = DCI.DAG;
17488 SDLoc dl(N);
17489 SDValue Cond = N->getOperand(0);
17490 SDValue TrueOpnd = N->getOperand(1);
17491 SDValue FalseOpnd = N->getOperand(2);
17492 EVT VT = N->getOperand(1).getValueType();
17493
17494 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
17495 FalseOpnd.getOpcode() != ISD::SUB)
17496 return SDValue();
17497
17498 // ABSD only available for type v4i32/v8i16/v16i8
17499 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17500 return SDValue();
17501
17502 // At least to save one more dependent computation
17503 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
17504 return SDValue();
17505
17506 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17507
17508 // Can only handle unsigned comparison here
17509 switch (CC) {
17510 default:
17511 return SDValue();
17512 case ISD::SETUGT:
17513 case ISD::SETUGE:
17514 break;
17515 case ISD::SETULT:
17516 case ISD::SETULE:
17517 std::swap(TrueOpnd, FalseOpnd);
17518 break;
17519 }
17520
17521 SDValue CmpOpnd1 = Cond.getOperand(0);
17522 SDValue CmpOpnd2 = Cond.getOperand(1);
17523
17524 // SETCC CmpOpnd1 CmpOpnd2 cond
17525 // TrueOpnd = CmpOpnd1 - CmpOpnd2
17526 // FalseOpnd = CmpOpnd2 - CmpOpnd1
17527 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
17528 TrueOpnd.getOperand(1) == CmpOpnd2 &&
17529 FalseOpnd.getOperand(0) == CmpOpnd2 &&
17530 FalseOpnd.getOperand(1) == CmpOpnd1) {
17531 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
17532 CmpOpnd1, CmpOpnd2,
17533 DAG.getTargetConstant(0, dl, MVT::i32));
17534 }
17535
17536 return SDValue();
17537}
17538
17539/// getAddrModeForFlags - Based on the set of address flags, select the most
17540/// optimal instruction format to match by.
17541PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
17542 // This is not a node we should be handling here.
17543 if (Flags == PPC::MOF_None)
17544 return PPC::AM_None;
17545 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
17546 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
17547 if ((Flags & FlagSet) == FlagSet)
17548 return PPC::AM_DForm;
17549 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
17550 if ((Flags & FlagSet) == FlagSet)
17551 return PPC::AM_DSForm;
17552 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
17553 if ((Flags & FlagSet) == FlagSet)
17554 return PPC::AM_DQForm;
17555 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
17556 if ((Flags & FlagSet) == FlagSet)
17557 return PPC::AM_PrefixDForm;
17558 // If no other forms are selected, return an X-Form as it is the most
17559 // general addressing mode.
17560 return PPC::AM_XForm;
17561}
17562
17563/// Set alignment flags based on whether or not the Frame Index is aligned.
17564/// Utilized when computing flags for address computation when selecting
17565/// load and store instructions.
17566static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
17567 SelectionDAG &DAG) {
17568 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
17569 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
17570 if (!FI)
17571 return;
17573 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
17574 // If this is (add $FI, $S16Imm), the alignment flags are already set
17575 // based on the immediate. We just need to clear the alignment flags
17576 // if the FI alignment is weaker.
17577 if ((FrameIndexAlign % 4) != 0)
17578 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
17579 if ((FrameIndexAlign % 16) != 0)
17580 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
17581 // If the address is a plain FrameIndex, set alignment flags based on
17582 // FI alignment.
17583 if (!IsAdd) {
17584 if ((FrameIndexAlign % 4) == 0)
17585 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17586 if ((FrameIndexAlign % 16) == 0)
17587 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17588 }
17589}
17590
17591/// Given a node, compute flags that are used for address computation when
17592/// selecting load and store instructions. The flags computed are stored in
17593/// FlagSet. This function takes into account whether the node is a constant,
17594/// an ADD, OR, or a constant, and computes the address flags accordingly.
17595static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
17596 SelectionDAG &DAG) {
17597 // Set the alignment flags for the node depending on if the node is
17598 // 4-byte or 16-byte aligned.
17599 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
17600 if ((Imm & 0x3) == 0)
17601 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17602 if ((Imm & 0xf) == 0)
17603 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17604 };
17605
17607 // All 32-bit constants can be computed as LIS + Disp.
17608 const APInt &ConstImm = CN->getAPIntValue();
17609 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
17610 FlagSet |= PPC::MOF_AddrIsSImm32;
17611 SetAlignFlagsForImm(ConstImm.getZExtValue());
17612 setAlignFlagsForFI(N, FlagSet, DAG);
17613 }
17614 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
17615 FlagSet |= PPC::MOF_RPlusSImm34;
17616 else // Let constant materialization handle large constants.
17617 FlagSet |= PPC::MOF_NotAddNorCst;
17618 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
17619 // This address can be represented as an addition of:
17620 // - Register + Imm16 (possibly a multiple of 4/16)
17621 // - Register + Imm34
17622 // - Register + PPCISD::Lo
17623 // - Register + Register
17624 // In any case, we won't have to match this as Base + Zero.
17625 SDValue RHS = N.getOperand(1);
17627 const APInt &ConstImm = CN->getAPIntValue();
17628 if (ConstImm.isSignedIntN(16)) {
17629 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
17630 SetAlignFlagsForImm(ConstImm.getZExtValue());
17631 setAlignFlagsForFI(N, FlagSet, DAG);
17632 }
17633 if (ConstImm.isSignedIntN(34))
17634 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
17635 else
17636 FlagSet |= PPC::MOF_RPlusR; // Register.
17637 } else if (RHS.getOpcode() == PPCISD::Lo &&
17638 !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
17639 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
17640 else
17641 FlagSet |= PPC::MOF_RPlusR;
17642 } else { // The address computation is not a constant or an addition.
17643 setAlignFlagsForFI(N, FlagSet, DAG);
17644 FlagSet |= PPC::MOF_NotAddNorCst;
17645 }
17646}
17647
17655
17656/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
17657/// the address flags of the load/store instruction that is to be matched.
17658unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
17659 SelectionDAG &DAG) const {
17660 unsigned FlagSet = PPC::MOF_None;
17661
17662 // Compute subtarget flags.
17663 if (!Subtarget.hasP9Vector())
17664 FlagSet |= PPC::MOF_SubtargetBeforeP9;
17665 else {
17666 FlagSet |= PPC::MOF_SubtargetP9;
17667 if (Subtarget.hasPrefixInstrs())
17668 FlagSet |= PPC::MOF_SubtargetP10;
17669 }
17670 if (Subtarget.hasSPE())
17671 FlagSet |= PPC::MOF_SubtargetSPE;
17672
17673 // Check if we have a PCRel node and return early.
17674 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
17675 return FlagSet;
17676
17677 // If the node is the paired load/store intrinsics, compute flags for
17678 // address computation and return early.
17679 unsigned ParentOp = Parent->getOpcode();
17680 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
17681 (ParentOp == ISD::INTRINSIC_VOID))) {
17682 unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
17683 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
17684 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
17685 ? Parent->getOperand(2)
17686 : Parent->getOperand(3);
17687 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
17688 FlagSet |= PPC::MOF_Vector;
17689 return FlagSet;
17690 }
17691 }
17692
17693 // Mark this as something we don't want to handle here if it is atomic
17694 // or pre-increment instruction.
17695 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
17696 if (LSB->isIndexed())
17697 return PPC::MOF_None;
17698
17699 // Compute in-memory type flags. This is based on if there are scalars,
17700 // floats or vectors.
17701 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
17702 assert(MN && "Parent should be a MemSDNode!");
17703 EVT MemVT = MN->getMemoryVT();
17704 unsigned Size = MemVT.getSizeInBits();
17705 if (MemVT.isScalarInteger()) {
17706 assert(Size <= 128 &&
17707 "Not expecting scalar integers larger than 16 bytes!");
17708 if (Size < 32)
17709 FlagSet |= PPC::MOF_SubWordInt;
17710 else if (Size == 32)
17711 FlagSet |= PPC::MOF_WordInt;
17712 else
17713 FlagSet |= PPC::MOF_DoubleWordInt;
17714 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
17715 if (Size == 128)
17716 FlagSet |= PPC::MOF_Vector;
17717 else if (Size == 256) {
17718 assert(Subtarget.pairedVectorMemops() &&
17719 "256-bit vectors are only available when paired vector memops is "
17720 "enabled!");
17721 FlagSet |= PPC::MOF_Vector;
17722 } else
17723 llvm_unreachable("Not expecting illegal vectors!");
17724 } else { // Floating point type: can be scalar, f128 or vector types.
17725 if (Size == 32 || Size == 64)
17726 FlagSet |= PPC::MOF_ScalarFloat;
17727 else if (MemVT == MVT::f128 || MemVT.isVector())
17728 FlagSet |= PPC::MOF_Vector;
17729 else
17730 llvm_unreachable("Not expecting illegal scalar floats!");
17731 }
17732
17733 // Compute flags for address computation.
17734 computeFlagsForAddressComputation(N, FlagSet, DAG);
17735
17736 // Compute type extension flags.
17737 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
17738 switch (LN->getExtensionType()) {
17739 case ISD::SEXTLOAD:
17740 FlagSet |= PPC::MOF_SExt;
17741 break;
17742 case ISD::EXTLOAD:
17743 case ISD::ZEXTLOAD:
17744 FlagSet |= PPC::MOF_ZExt;
17745 break;
17746 case ISD::NON_EXTLOAD:
17747 FlagSet |= PPC::MOF_NoExt;
17748 break;
17749 }
17750 } else
17751 FlagSet |= PPC::MOF_NoExt;
17752
17753 // For integers, no extension is the same as zero extension.
17754 // We set the extension mode to zero extension so we don't have
17755 // to add separate entries in AddrModesMap for loads and stores.
17756 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
17757 FlagSet |= PPC::MOF_ZExt;
17758 FlagSet &= ~PPC::MOF_NoExt;
17759 }
17760
17761 // If we don't have prefixed instructions, 34-bit constants should be
17762 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
17763 bool IsNonP1034BitConst =
17765 FlagSet) == PPC::MOF_RPlusSImm34;
17766 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
17767 IsNonP1034BitConst)
17768 FlagSet |= PPC::MOF_NotAddNorCst;
17769
17770 return FlagSet;
17771}
17772
17773/// SelectForceXFormMode - Given the specified address, force it to be
17774/// represented as an indexed [r+r] operation (an XForm instruction).
17776 SDValue &Base,
17777 SelectionDAG &DAG) const {
17778
17780 int16_t ForceXFormImm = 0;
17781 if (provablyDisjointOr(DAG, N) &&
17782 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
17783 Disp = N.getOperand(0);
17784 Base = N.getOperand(1);
17785 return Mode;
17786 }
17787
17788 // If the address is the result of an add, we will utilize the fact that the
17789 // address calculation includes an implicit add. However, we can reduce
17790 // register pressure if we do not materialize a constant just for use as the
17791 // index register. We only get rid of the add if it is not an add of a
17792 // value and a 16-bit signed constant and both have a single use.
17793 if (N.getOpcode() == ISD::ADD &&
17794 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
17795 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
17796 Disp = N.getOperand(0);
17797 Base = N.getOperand(1);
17798 return Mode;
17799 }
17800
17801 // Otherwise, use R0 as the base register.
17802 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17803 N.getValueType());
17804 Base = N;
17805
17806 return Mode;
17807}
17808
17810 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
17811 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
17812 EVT ValVT = Val.getValueType();
17813 // If we are splitting a scalar integer into f64 parts (i.e. so they
17814 // can be placed into VFRC registers), we need to zero extend and
17815 // bitcast the values. This will ensure the value is placed into a
17816 // VSR using direct moves or stack operations as needed.
17817 if (PartVT == MVT::f64 &&
17818 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
17819 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
17820 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
17821 Parts[0] = Val;
17822 return true;
17823 }
17824 return false;
17825}
17826
17827SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
17828 SelectionDAG &DAG) const {
17829 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17831 EVT RetVT = Op.getValueType();
17832 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
17833 SDValue Callee =
17834 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
17835 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
17838 for (const SDValue &N : Op->op_values()) {
17839 EVT ArgVT = N.getValueType();
17840 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
17841 Entry.Node = N;
17842 Entry.Ty = ArgTy;
17843 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
17844 Entry.IsZExt = !Entry.IsSExt;
17845 Args.push_back(Entry);
17846 }
17847
17848 SDValue InChain = DAG.getEntryNode();
17849 SDValue TCChain = InChain;
17850 const Function &F = DAG.getMachineFunction().getFunction();
17851 bool isTailCall =
17852 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
17853 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
17854 if (isTailCall)
17855 InChain = TCChain;
17856 CLI.setDebugLoc(SDLoc(Op))
17857 .setChain(InChain)
17858 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
17859 .setTailCall(isTailCall)
17860 .setSExtResult(SignExtend)
17861 .setZExtResult(!SignExtend)
17863 return TLI.LowerCallTo(CLI).first;
17864}
17865
17866SDValue PPCTargetLowering::lowerLibCallBasedOnType(
17867 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
17868 SelectionDAG &DAG) const {
17869 if (Op.getValueType() == MVT::f32)
17870 return lowerToLibCall(LibCallFloatName, Op, DAG);
17871
17872 if (Op.getValueType() == MVT::f64)
17873 return lowerToLibCall(LibCallDoubleName, Op, DAG);
17874
17875 return SDValue();
17876}
17877
17878bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
17879 SDNodeFlags Flags = Op.getNode()->getFlags();
17880 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
17881 Flags.hasNoNaNs() && Flags.hasNoInfs();
17882}
17883
17884bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
17885 return Op.getNode()->getFlags().hasApproximateFuncs();
17886}
17887
17888bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
17890}
17891
17892SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
17893 const char *LibCallFloatName,
17894 const char *LibCallDoubleNameFinite,
17895 const char *LibCallFloatNameFinite,
17896 SDValue Op,
17897 SelectionDAG &DAG) const {
17898 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
17899 return SDValue();
17900
17901 if (!isLowringToMASSFiniteSafe(Op))
17902 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
17903 DAG);
17904
17905 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
17906 LibCallDoubleNameFinite, Op, DAG);
17907}
17908
17909SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
17910 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
17911 "__xl_powf_finite", Op, DAG);
17912}
17913
17914SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
17915 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
17916 "__xl_sinf_finite", Op, DAG);
17917}
17918
17919SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
17920 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
17921 "__xl_cosf_finite", Op, DAG);
17922}
17923
17924SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
17925 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
17926 "__xl_logf_finite", Op, DAG);
17927}
17928
17929SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
17930 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
17931 "__xl_log10f_finite", Op, DAG);
17932}
17933
17934SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
17935 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
17936 "__xl_expf_finite", Op, DAG);
17937}
17938
17939// If we happen to match to an aligned D-Form, check if the Frame Index is
17940// adequately aligned. If it is not, reset the mode to match to X-Form.
17941static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
17942 PPC::AddrMode &Mode) {
17944 return;
17945 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
17946 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
17947 Mode = PPC::AM_XForm;
17948}
17949
17950/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
17951/// compute the address flags of the node, get the optimal address mode based
17952/// on the flags, and set the Base and Disp based on the address mode.
17954 SDValue N, SDValue &Disp,
17955 SDValue &Base,
17956 SelectionDAG &DAG,
17957 MaybeAlign Align) const {
17958 SDLoc DL(Parent);
17959
17960 // Compute the address flags.
17961 unsigned Flags = computeMOFlags(Parent, N, DAG);
17962
17963 // Get the optimal address mode based on the Flags.
17964 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
17965
17966 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
17967 // Select an X-Form load if it is not.
17968 setXFormForUnalignedFI(N, Flags, Mode);
17969
17970 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
17971 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
17972 assert(Subtarget.isUsingPCRelativeCalls() &&
17973 "Must be using PC-Relative calls when a valid PC-Relative node is "
17974 "present!");
17975 Mode = PPC::AM_PCRel;
17976 }
17977
17978 // Set Base and Disp accordingly depending on the address mode.
17979 switch (Mode) {
17980 case PPC::AM_DForm:
17981 case PPC::AM_DSForm:
17982 case PPC::AM_DQForm: {
17983 // This is a register plus a 16-bit immediate. The base will be the
17984 // register and the displacement will be the immediate unless it
17985 // isn't sufficiently aligned.
17986 if (Flags & PPC::MOF_RPlusSImm16) {
17987 SDValue Op0 = N.getOperand(0);
17988 SDValue Op1 = N.getOperand(1);
17989 int16_t Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
17990 if (!Align || isAligned(*Align, Imm)) {
17991 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
17992 Base = Op0;
17994 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
17995 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
17996 }
17997 break;
17998 }
17999 }
18000 // This is a register plus the @lo relocation. The base is the register
18001 // and the displacement is the global address.
18002 else if (Flags & PPC::MOF_RPlusLo) {
18003 Disp = N.getOperand(1).getOperand(0); // The global address.
18008 Base = N.getOperand(0);
18009 break;
18010 }
18011 // This is a constant address at most 32 bits. The base will be
18012 // zero or load-immediate-shifted and the displacement will be
18013 // the low 16 bits of the address.
18014 else if (Flags & PPC::MOF_AddrIsSImm32) {
18015 auto *CN = cast<ConstantSDNode>(N);
18016 EVT CNType = CN->getValueType(0);
18017 uint64_t CNImm = CN->getZExtValue();
18018 // If this address fits entirely in a 16-bit sext immediate field, codegen
18019 // this as "d, 0".
18020 int16_t Imm;
18021 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18022 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18023 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18024 CNType);
18025 break;
18026 }
18027 // Handle 32-bit sext immediate with LIS + Addr mode.
18028 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18029 (!Align || isAligned(*Align, CNImm))) {
18030 int32_t Addr = (int32_t)CNImm;
18031 // Otherwise, break this down into LIS + Disp.
18032 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18033 Base =
18034 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18035 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18036 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18037 break;
18038 }
18039 }
18040 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18041 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18043 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18044 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18045 } else
18046 Base = N;
18047 break;
18048 }
18049 case PPC::AM_PrefixDForm: {
18050 int64_t Imm34 = 0;
18051 unsigned Opcode = N.getOpcode();
18052 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18053 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18054 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18055 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18056 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18057 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18058 else
18059 Base = N.getOperand(0);
18060 } else if (isIntS34Immediate(N, Imm34)) {
18061 // The address is a 34-bit signed immediate.
18062 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18063 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18064 }
18065 break;
18066 }
18067 case PPC::AM_PCRel: {
18068 // When selecting PC-Relative instructions, "Base" is not utilized as
18069 // we select the address as [PC+imm].
18070 Disp = N;
18071 break;
18072 }
18073 case PPC::AM_None:
18074 break;
18075 default: { // By default, X-Form is always available to be selected.
18076 // When a frame index is not aligned, we also match by XForm.
18078 Base = FI ? N : N.getOperand(1);
18079 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18080 N.getValueType())
18081 : N.getOperand(0);
18082 break;
18083 }
18084 }
18085 return Mode;
18086}
18087
18089 bool Return,
18090 bool IsVarArg) const {
18091 switch (CC) {
18092 case CallingConv::Cold:
18093 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF_FIS);
18094 default:
18095 return CC_PPC64_ELF_FIS;
18096 }
18097}
18098
18100 // TODO: 16-byte atomic type support for AIX is in progress; we should be able
18101 // to inline 16-byte atomic ops on AIX too in the future.
18102 return Subtarget.isPPC64() &&
18103 (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
18104 Subtarget.hasQuadwordAtomics();
18105}
18106
18114
18122
18123static Intrinsic::ID
18125 switch (BinOp) {
18126 default:
18127 llvm_unreachable("Unexpected AtomicRMW BinOp");
18129 return Intrinsic::ppc_atomicrmw_xchg_i128;
18130 case AtomicRMWInst::Add:
18131 return Intrinsic::ppc_atomicrmw_add_i128;
18132 case AtomicRMWInst::Sub:
18133 return Intrinsic::ppc_atomicrmw_sub_i128;
18134 case AtomicRMWInst::And:
18135 return Intrinsic::ppc_atomicrmw_and_i128;
18136 case AtomicRMWInst::Or:
18137 return Intrinsic::ppc_atomicrmw_or_i128;
18138 case AtomicRMWInst::Xor:
18139 return Intrinsic::ppc_atomicrmw_xor_i128;
18141 return Intrinsic::ppc_atomicrmw_nand_i128;
18142 }
18143}
18144
18146 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18147 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18148 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18149 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18150 Type *ValTy = Incr->getType();
18151 assert(ValTy->getPrimitiveSizeInBits() == 128);
18154 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18155 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18156 Value *IncrHi =
18157 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18158 Value *Addr =
18159 Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18160 Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
18161 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18162 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18163 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18164 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18165 return Builder.CreateOr(
18166 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18167}
18168
18170 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18171 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18172 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18173 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18174 Type *ValTy = CmpVal->getType();
18175 assert(ValTy->getPrimitiveSizeInBits() == 128);
18176 Function *IntCmpXchg =
18177 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18178 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18179 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18180 Value *CmpHi =
18181 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18182 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18183 Value *NewHi =
18184 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18185 Value *Addr =
18186 Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18187 emitLeadingFence(Builder, CI, Ord);
18188 Value *LoHi =
18189 Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
18190 emitTrailingFence(Builder, CI, Ord);
18191 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18192 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18193 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18194 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18195 return Builder.CreateOr(
18196 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18197}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
#define Success
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition Compiler.h:280
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Align
uint64_t Offset
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
#define RegName(no)
lazy value info
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
unsigned Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static bool isConstantOrUndef(const SDValue Op)
Module.h This file contains the declarations for the Module class.
This file provides None, an enumerator for use in implicit constructors of various (usually templated...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool isFunctionGlobalAddress(SDValue Callee)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static cl::opt< bool > EnableQuadwordAtomics("ppc-quadword-atomics", cl::desc("enable quadword lock-free atomic operations"), cl::init(false), cl::Hidden)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(VerifyEach)
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
const char LLVMTargetMachineRef TM
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static bool isSplat(ArrayRef< Value * > VL)
This file contains some templates that are useful if you are working with the STL at all.
Shadow Stack GC Lowering
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:190
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:464
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:4843
bool isDenormal() const
Definition APFloat.h:1216
APInt bitcastToAPInt() const
Definition APFloat.h:1130
Class for arbitrary precision integers.
Definition APInt.h:75
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:214
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1357
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:434
APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:973
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1466
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1280
APInt abs() const
Get the absolute value.
Definition APInt.h:1686
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:312
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:420
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:452
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1620
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:425
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:289
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:279
This class represents an incoming formal argument to a Function.
Definition Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Definition ArrayRef.h:164
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ Nand
*p = ~(old & v)
BinOp getOperation() const
This is an SDNode representing atomic operations.
This class holds the attributes for a function, its return value, and its parameters.
Definition Attributes.h:425
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:104
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition Constants.h:849
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
bool needsCustom() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
unsigned getValNo() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:257
This is the shared class of boolean and integer constants.
Definition Constants.h:79
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:113
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:244
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
A debug info location.
Definition DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:152
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:209
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:65
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:664
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:654
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:661
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:238
const Function & getFunction() const
Definition Function.h:135
arg_iterator arg_begin()
Definition Function.h:741
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:320
size_t arg_size() const
Definition Function.h:774
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:628
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:551
StringRef getSection() const
Definition Globals.cpp:171
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
bool hasComdat() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:93
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition InlineAsm.h:355
static unsigned getKind(unsigned Flags)
Definition InlineAsm.h:344
const BasicBlock * getParent() const
Definition Instruction.h:91
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Context object for machine code objects.
Definition MCContext.h:76
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:24
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition MCExpr.h:386
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineInstr - Allocate a new MachineInstr.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:65
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition Module.cpp:588
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
bool useLongCalls() const
bool hasFRSQRTE() const
bool is32BitELFABI() const
bool hasMMA() const
unsigned descriptorTOCAnchorOffset() const
bool hasFPCVT() const
bool isAIXABI() const
bool useSoftFloat() const
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
bool hasAltivec() const
bool allowsUnalignedFPAccess() const
const PPCFrameLowering * getFrameLowering() const override
bool needsSwapsForVSXMemOps() const
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool needsTwoConstNR() const
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
bool hasQuadwordAtomics() const
bool hasFSQRT() const
bool hasP9Vector() const
bool hasFRE() const
bool hasFRSQRTES() const
MCRegister getEnvironmentPointerRegister() const
bool hasFloat128() const
const PPCInstrInfo * getInstrInfo() const override
bool hasFPU() const
bool hasP10Vector() const
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
bool hasRecipPrec() const
bool hasSTFIWX() const
bool isSVR4ABI() const
bool hasInvariantFunctionDescriptors() const
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
POPCNTDKind hasPOPCNTD() const
bool hasEFPU2() const
bool hasPrefixInstrs() const
bool hasPartwordAtomics() const
bool hasSPE() const
bool hasLFIWAX() const
bool isLittleEndian() const
bool hasFCPSGN() const
bool isTargetLinux() const
bool hasP9Altivec() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
bool is64BitELFABI() const
bool hasFPRND() const
bool isELFv2ABI() const
bool hasP8Vector() const
bool pairedVectorMemops() const
const PPCTargetMachine & getTargetMachine() const
bool isPredictableSelectIsExpensive() const
bool enableMachineScheduler() const override
Scheduling customization.
bool hasFRES() const
bool isISA3_1() const
bool hasLDBRX() const
const PPCRegisterInfo * getRegisterInfo() const override
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
const Triple & getTargetTriple() const
unsigned descriptorEnvironmentPointerOffset() const
bool isISA3_0() const
bool hasVSX() const
bool hasDirectMove() const
bool hasP8Altivec() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
unsigned getStackProbeSize(MachineFunction &MF) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, Optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
bool hasInlineStackProbe(MachineFunction &MF) const override
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=None) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:71
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:136
const_iterator begin() const
Definition SmallSet.h:225
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:166
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:182
const_iterator end() const
Definition SmallSet.h:231
LLVM_NODISCARD bool empty() const
Definition SmallVector.h:73
size_t size() const
Definition SmallVector.h:70
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
LLVM_NODISCARD T pop_back_val()
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition TypeSize.h:134
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:58
LLVM_NODISCARD constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:157
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:514
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:149
A switch()-like statement whose cases are string literals.
LLVM_NODISCARD R Default(T Value)
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
TargetOptions Options
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:83
static TypeSize Fixed(ScalarTy MinVal)
Definition TypeSize.h:441
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition Type.cpp:291
static IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:240
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:222
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:164
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:196
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
A Use represents the edge between a Value definition and its users.
Definition Use.h:43
Value * getOperand(unsigned i) const
Definition User.h:169
unsigned getNumOperands() const
Definition User.h:191
LLVM Value Representation.
Definition Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
User * user_back()
Definition Value.h:407
self_iterator getIterator()
Definition ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:80
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:42
@ C
C - The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:736
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:236
@ TargetConstantPool
Definition ISDOpcodes.h:168
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:475
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:44
@ FLT_ROUNDS_
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:863
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:147
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:250
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:700
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:269
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:239
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:766
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:482
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:199
@ GlobalAddress
Definition ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:773
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:542
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:674
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:255
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:229
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:411
@ GlobalTLSAddress
Definition ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:760
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:449
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:606
@ TargetExternalSymbol
Definition ISDOpcodes.h:169
@ TargetJumpTable
Definition ISDOpcodes.h:167
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:713
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:222
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:164
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:637
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:691
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:586
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:572
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:534
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:763
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:728
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:781
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:660
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:722
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:448
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:94
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:442
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:464
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:441
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:819
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:469
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:666
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:523
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:852
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:814
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:422
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:769
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:749
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:492
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:340
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:192
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:165
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:514
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
@ VecShuffle
Definition NVPTX.h:88
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:154
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:139
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:113
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition PPC.h:118
@ MO_TPREL_HA
Definition PPC.h:164
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition PPC.h:105
@ MO_TLS
Definition PPC.h:173
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set the symbol reference is relative to TLS Initial Exec model.
Definition PPC.h:131
@ MO_TPREL_LO
Definition PPC.h:163
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:160
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:149
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:127
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:144
@ MO_HA
Definition PPC.h:161
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:109
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ VABSD
An SDNode for Power9 vector absolute value difference.
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ FP_TO_UINT_IN_VSR
Floating-point-to-integer conversion instructions.
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ XMC_PR
Program Code.
Definition XCOFF.h:90
@ XTY_ER
External reference.
Definition XCOFF.h:226
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
constexpr bool isUInt< 16 >(uint64_t x)
Definition MathExtras.h:408
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1617
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:363
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_NODISCARD bool isa_and_nonnull(const Y &Val)
Definition Casting.h:677
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:146
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
LLVM_NODISCARD decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_NODISCARD decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:597
constexpr bool isInt< 32 >(int64_t x)
Definition MathExtras.h:373
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:496
constexpr bool isInt< 16 >(int64_t x)
Definition MathExtras.h:370
LLVM_NODISCARD bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition MathExtras.h:690
unsigned M1(unsigned Val)
Definition VE.h:370
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:754
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1624
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition MathExtras.h:722
const NoneType None
Definition None.h:24
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:491
bool convertToNonDenormSingle(APInt &ArgAPInt)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition MathExtras.h:156
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:163
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition ArrayRef.h:475
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition Error.cpp:145
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
FunctionAddr Value
Definition InstrProf.h:113
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
@ Z
zlib style complession
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:156
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1709
unsigned M0(unsigned Val)
Definition VE.h:369
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:379
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:801
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned BitWidth
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:213
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:817
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:417
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:170
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:190
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition APFloat.cpp:185
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:362
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:129
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:256
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:139
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:340
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:352
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:288
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:348
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:154
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:295
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:300
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:134
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:149
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:308
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:420
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:144
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:66
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)